mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
remove some useless virtual and rename some functions in HiveFile
This commit is contained in:
parent
2ef316801c
commit
acc7046d54
@ -145,7 +145,7 @@ void HiveOrcFile::prepareColumnMapping()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HiveOrcFile::hasMinMaxIndex() const
|
bool HiveOrcFile::useFileMinMaxIndex() const
|
||||||
{
|
{
|
||||||
return storage_settings->enable_orc_file_minmax_index;
|
return storage_settings->enable_orc_file_minmax_index;
|
||||||
}
|
}
|
||||||
@ -196,7 +196,7 @@ void HiveOrcFile::loadMinMaxIndex()
|
|||||||
minmax_idx = buildMinMaxIndex(statistics.get());
|
minmax_idx = buildMinMaxIndex(statistics.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HiveOrcFile::hasSubMinMaxIndex() const
|
bool HiveOrcFile::useSplitMinMaxIndex() const
|
||||||
{
|
{
|
||||||
return storage_settings->enable_orc_stripe_minmax_index;
|
return storage_settings->enable_orc_stripe_minmax_index;
|
||||||
}
|
}
|
||||||
@ -226,7 +226,7 @@ void HiveOrcFile::loadSubMinMaxIndex()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HiveParquetFile::hasSubMinMaxIndex() const
|
bool HiveParquetFile::useSplitMinMaxIndex() const
|
||||||
{
|
{
|
||||||
return storage_settings->enable_parquet_rowgroup_minmax_index;
|
return storage_settings->enable_parquet_rowgroup_minmax_index;
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
IHiveFile(
|
IHiveFile(
|
||||||
const FieldVector & values_,
|
const FieldVector & partition_values_,
|
||||||
const String & namenode_url_,
|
const String & namenode_url_,
|
||||||
const String & path_,
|
const String & path_,
|
||||||
UInt64 last_modify_time_,
|
UInt64 last_modify_time_,
|
||||||
@ -85,7 +85,7 @@ public:
|
|||||||
const std::shared_ptr<HiveSettings> & storage_settings_,
|
const std::shared_ptr<HiveSettings> & storage_settings_,
|
||||||
ContextPtr context_)
|
ContextPtr context_)
|
||||||
: WithContext(context_)
|
: WithContext(context_)
|
||||||
, partition_values(values_)
|
, partition_values(partition_values_)
|
||||||
, namenode_url(namenode_url_)
|
, namenode_url(namenode_url_)
|
||||||
, path(path_)
|
, path(path_)
|
||||||
, last_modify_time(last_modify_time_)
|
, last_modify_time(last_modify_time_)
|
||||||
@ -96,56 +96,47 @@ public:
|
|||||||
}
|
}
|
||||||
virtual ~IHiveFile() = default;
|
virtual ~IHiveFile() = default;
|
||||||
|
|
||||||
virtual FileFormat getFormat() const = 0;
|
String getFormatName() const { return String(magic_enum::enum_name(getFormat())); }
|
||||||
|
const String & getPath() const { return path; }
|
||||||
|
UInt64 getLastModTs() const { return last_modify_time; }
|
||||||
|
size_t getSize() const { return size; }
|
||||||
|
const FieldVector & getPartitionValues() const { return partition_values; }
|
||||||
|
const String & getNamenodeUrl() { return namenode_url; }
|
||||||
|
MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; }
|
||||||
|
const std::vector<MinMaxIndexPtr> & getSubMinMaxIndexes() const { return sub_minmax_idxes; }
|
||||||
|
|
||||||
virtual String getName() const = 0;
|
const std::unordered_set<int> & getSkipSplits() const { return skip_splits; }
|
||||||
|
void setSkipSplits(const std::unordered_set<int> & skip_splits_) { skip_splits = skip_splits_; }
|
||||||
|
|
||||||
virtual String getPath() const { return path; }
|
String describeMinMaxIndex(const MinMaxIndexPtr & idx) const
|
||||||
|
|
||||||
virtual FieldVector getPartitionValues() const { return partition_values; }
|
|
||||||
|
|
||||||
virtual String getNamenodeUrl() { return namenode_url; }
|
|
||||||
|
|
||||||
virtual bool hasMinMaxIndex() const { return false; }
|
|
||||||
|
|
||||||
virtual void loadMinMaxIndex()
|
|
||||||
{
|
|
||||||
throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; }
|
|
||||||
|
|
||||||
// Do hive file contains sub-file level minmax index?
|
|
||||||
virtual bool hasSubMinMaxIndex() const { return false; }
|
|
||||||
|
|
||||||
virtual void loadSubMinMaxIndex()
|
|
||||||
{
|
|
||||||
throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual const std::vector<MinMaxIndexPtr> & getSubMinMaxIndexes() const { return sub_minmax_idxes; }
|
|
||||||
|
|
||||||
virtual void setSkipSplits(const std::unordered_set<int> & skip_splits_) { skip_splits = skip_splits_; }
|
|
||||||
|
|
||||||
virtual const std::unordered_set<int> & getSkipSplits() const { return skip_splits; }
|
|
||||||
|
|
||||||
inline std::string describeMinMaxIndex(const MinMaxIndexPtr & idx) const
|
|
||||||
{
|
{
|
||||||
if (!idx)
|
if (!idx)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
std::vector<std::string> strs;
|
std::vector<String> strs;
|
||||||
strs.reserve(index_names_and_types.size());
|
strs.reserve(index_names_and_types.size());
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (const auto & name_type : index_names_and_types)
|
for (const auto & name_type : index_names_and_types)
|
||||||
{
|
|
||||||
strs.push_back(name_type.name + ":" + name_type.type->getName() + idx->hyperrectangle[i++].toString());
|
strs.push_back(name_type.name + ":" + name_type.type->getName() + idx->hyperrectangle[i++].toString());
|
||||||
}
|
|
||||||
return boost::algorithm::join(strs, "|");
|
return boost::algorithm::join(strs, "|");
|
||||||
}
|
}
|
||||||
|
|
||||||
inline UInt64 getLastModTs() const { return last_modify_time; }
|
virtual FileFormat getFormat() const = 0;
|
||||||
inline size_t getSize() const { return size; }
|
|
||||||
|
virtual bool useFileMinMaxIndex() const { return false; }
|
||||||
|
|
||||||
|
virtual void loadMinMaxIndex()
|
||||||
|
{
|
||||||
|
throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If hive query could use contains sub-file level minmax index?
|
||||||
|
virtual bool useSplitMinMaxIndex() const { return false; }
|
||||||
|
|
||||||
|
virtual void loadSubMinMaxIndex()
|
||||||
|
{
|
||||||
|
throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
FieldVector partition_values;
|
FieldVector partition_values;
|
||||||
@ -168,7 +159,7 @@ class HiveTextFile : public IHiveFile
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
HiveTextFile(
|
HiveTextFile(
|
||||||
const FieldVector & values_,
|
const FieldVector & partition_values_,
|
||||||
const String & namenode_url_,
|
const String & namenode_url_,
|
||||||
const String & path_,
|
const String & path_,
|
||||||
UInt64 last_modify_time_,
|
UInt64 last_modify_time_,
|
||||||
@ -176,19 +167,18 @@ public:
|
|||||||
const NamesAndTypesList & index_names_and_types_,
|
const NamesAndTypesList & index_names_and_types_,
|
||||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||||
ContextPtr context_)
|
ContextPtr context_)
|
||||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual FileFormat getFormat() const override { return FileFormat::TEXT; }
|
virtual FileFormat getFormat() const override { return FileFormat::TEXT; }
|
||||||
virtual String getName() const override { return "TEXT"; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class HiveOrcFile : public IHiveFile
|
class HiveOrcFile : public IHiveFile
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
HiveOrcFile(
|
HiveOrcFile(
|
||||||
const FieldVector & values_,
|
const FieldVector & partition_values_,
|
||||||
const String & namenode_url_,
|
const String & namenode_url_,
|
||||||
const String & path_,
|
const String & path_,
|
||||||
UInt64 last_modify_time_,
|
UInt64 last_modify_time_,
|
||||||
@ -196,23 +186,22 @@ public:
|
|||||||
const NamesAndTypesList & index_names_and_types_,
|
const NamesAndTypesList & index_names_and_types_,
|
||||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||||
ContextPtr context_)
|
ContextPtr context_)
|
||||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual FileFormat getFormat() const override { return FileFormat::ORC; }
|
FileFormat getFormat() const override { return FileFormat::ORC; }
|
||||||
virtual String getName() const override { return "ORC"; }
|
bool useFileMinMaxIndex() const override;
|
||||||
virtual bool hasMinMaxIndex() const override;
|
void loadMinMaxIndex() override;
|
||||||
virtual void loadMinMaxIndex() override;
|
|
||||||
|
|
||||||
virtual bool hasSubMinMaxIndex() const override;
|
bool useSplitMinMaxIndex() const override;
|
||||||
virtual void loadSubMinMaxIndex() override;
|
void loadSubMinMaxIndex() override;
|
||||||
|
|
||||||
protected:
|
private:
|
||||||
virtual std::unique_ptr<MinMaxIndex> buildMinMaxIndex(const orc::Statistics * statistics);
|
std::unique_ptr<MinMaxIndex> buildMinMaxIndex(const orc::Statistics * statistics);
|
||||||
virtual Range buildRange(const orc::ColumnStatistics * col_stats);
|
Range buildRange(const orc::ColumnStatistics * col_stats);
|
||||||
virtual void prepareReader();
|
void prepareReader();
|
||||||
virtual void prepareColumnMapping();
|
void prepareColumnMapping();
|
||||||
|
|
||||||
std::unique_ptr<ReadBufferFromHDFS> in;
|
std::unique_ptr<ReadBufferFromHDFS> in;
|
||||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> reader;
|
std::unique_ptr<arrow::adapters::orc::ORCFileReader> reader;
|
||||||
@ -223,7 +212,7 @@ class HiveParquetFile : public IHiveFile
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
HiveParquetFile(
|
HiveParquetFile(
|
||||||
const FieldVector & values_,
|
const FieldVector & partition_values_,
|
||||||
const String & namenode_url_,
|
const String & namenode_url_,
|
||||||
const String & path_,
|
const String & path_,
|
||||||
UInt64 last_modify_time_,
|
UInt64 last_modify_time_,
|
||||||
@ -231,18 +220,17 @@ public:
|
|||||||
const NamesAndTypesList & index_names_and_types_,
|
const NamesAndTypesList & index_names_and_types_,
|
||||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||||
ContextPtr context_)
|
ContextPtr context_)
|
||||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual FileFormat getFormat() const override { return FileFormat::PARQUET; }
|
FileFormat getFormat() const override { return FileFormat::PARQUET; }
|
||||||
virtual String getName() const override { return "PARQUET"; }
|
|
||||||
|
|
||||||
virtual bool hasSubMinMaxIndex() const override;
|
bool useSplitMinMaxIndex() const override;
|
||||||
virtual void loadSubMinMaxIndex() override;
|
void loadSubMinMaxIndex() override;
|
||||||
|
|
||||||
protected:
|
private:
|
||||||
virtual void prepareReader();
|
void prepareReader();
|
||||||
|
|
||||||
std::unique_ptr<ReadBufferFromHDFS> in;
|
std::unique_ptr<ReadBufferFromHDFS> in;
|
||||||
std::unique_ptr<parquet::arrow::FileReader> reader;
|
std::unique_ptr<parquet::arrow::FileReader> reader;
|
||||||
|
@ -543,7 +543,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded(
|
|||||||
|
|
||||||
/// Load file level minmax index and apply
|
/// Load file level minmax index and apply
|
||||||
const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
|
const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
|
||||||
if (hive_file->hasMinMaxIndex())
|
if (hive_file->useFileMinMaxIndex())
|
||||||
{
|
{
|
||||||
hive_file->loadMinMaxIndex();
|
hive_file->loadMinMaxIndex();
|
||||||
if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes())
|
if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes())
|
||||||
@ -556,7 +556,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Load sub-file level minmax index and apply
|
/// Load sub-file level minmax index and apply
|
||||||
if (hive_file->hasSubMinMaxIndex())
|
if (hive_file->useSplitMinMaxIndex())
|
||||||
{
|
{
|
||||||
std::unordered_set<int> skip_splits;
|
std::unordered_set<int> skip_splits;
|
||||||
hive_file->loadSubMinMaxIndex();
|
hive_file->loadSubMinMaxIndex();
|
||||||
|
Loading…
Reference in New Issue
Block a user