diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 3f4260d9f9e..cc2687415ff 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -145,7 +145,7 @@ void HiveOrcFile::prepareColumnMapping() } } -bool HiveOrcFile::hasMinMaxIndex() const +bool HiveOrcFile::useFileMinMaxIndex() const { return storage_settings->enable_orc_file_minmax_index; } @@ -196,7 +196,7 @@ void HiveOrcFile::loadMinMaxIndex() minmax_idx = buildMinMaxIndex(statistics.get()); } -bool HiveOrcFile::hasSubMinMaxIndex() const +bool HiveOrcFile::useSplitMinMaxIndex() const { return storage_settings->enable_orc_stripe_minmax_index; } @@ -226,7 +226,7 @@ void HiveOrcFile::loadSubMinMaxIndex() } } -bool HiveParquetFile::hasSubMinMaxIndex() const +bool HiveParquetFile::useSplitMinMaxIndex() const { return storage_settings->enable_parquet_rowgroup_minmax_index; } diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 74f893a073f..6a25fa3698b 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -76,7 +76,7 @@ public: } IHiveFile( - const FieldVector & values_, + const FieldVector & partition_values_, const String & namenode_url_, const String & path_, UInt64 last_modify_time_, @@ -85,7 +85,7 @@ public: const std::shared_ptr & storage_settings_, ContextPtr context_) : WithContext(context_) - , partition_values(values_) + , partition_values(partition_values_) , namenode_url(namenode_url_) , path(path_) , last_modify_time(last_modify_time_) @@ -96,56 +96,47 @@ public: } virtual ~IHiveFile() = default; - virtual FileFormat getFormat() const = 0; + String getFormatName() const { return String(magic_enum::enum_name(getFormat())); } + const String & getPath() const { return path; } + UInt64 getLastModTs() const { return last_modify_time; } + size_t getSize() const { return size; } + const FieldVector & getPartitionValues() const { return partition_values; } + const String & getNamenodeUrl() { return namenode_url; } + MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; } + const std::vector & getSubMinMaxIndexes() const { return sub_minmax_idxes; } - virtual String getName() const = 0; + const std::unordered_set & getSkipSplits() const { return skip_splits; } + void setSkipSplits(const std::unordered_set & skip_splits_) { skip_splits = skip_splits_; } - virtual String getPath() const { return path; } - - virtual FieldVector getPartitionValues() const { return partition_values; } - - virtual String getNamenodeUrl() { return namenode_url; } - - virtual bool hasMinMaxIndex() const { return false; } - - virtual void loadMinMaxIndex() - { - throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - virtual MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; } - - // Do hive file contains sub-file level minmax index? - virtual bool hasSubMinMaxIndex() const { return false; } - - virtual void loadSubMinMaxIndex() - { - throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - virtual const std::vector & getSubMinMaxIndexes() const { return sub_minmax_idxes; } - - virtual void setSkipSplits(const std::unordered_set & skip_splits_) { skip_splits = skip_splits_; } - - virtual const std::unordered_set & getSkipSplits() const { return skip_splits; } - - inline std::string describeMinMaxIndex(const MinMaxIndexPtr & idx) const + String describeMinMaxIndex(const MinMaxIndexPtr & idx) const { if (!idx) return ""; - std::vector strs; + std::vector strs; strs.reserve(index_names_and_types.size()); size_t i = 0; for (const auto & name_type : index_names_and_types) - { strs.push_back(name_type.name + ":" + name_type.type->getName() + idx->hyperrectangle[i++].toString()); - } return boost::algorithm::join(strs, "|"); } - inline UInt64 getLastModTs() const { return last_modify_time; } - inline size_t getSize() const { return size; } + virtual FileFormat getFormat() const = 0; + + virtual bool useFileMinMaxIndex() const { return false; } + + virtual void loadMinMaxIndex() + { + throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + } + + /// If hive query could use contains sub-file level minmax index? + virtual bool useSplitMinMaxIndex() const { return false; } + + virtual void loadSubMinMaxIndex() + { + throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + } protected: FieldVector partition_values; @@ -168,7 +159,7 @@ class HiveTextFile : public IHiveFile { public: HiveTextFile( - const FieldVector & values_, + const FieldVector & partition_values_, const String & namenode_url_, const String & path_, UInt64 last_modify_time_, @@ -176,19 +167,18 @@ public: const NamesAndTypesList & index_names_and_types_, const std::shared_ptr & hive_settings_, ContextPtr context_) - : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + : IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) { } virtual FileFormat getFormat() const override { return FileFormat::TEXT; } - virtual String getName() const override { return "TEXT"; } }; class HiveOrcFile : public IHiveFile { public: HiveOrcFile( - const FieldVector & values_, + const FieldVector & partition_values_, const String & namenode_url_, const String & path_, UInt64 last_modify_time_, @@ -196,23 +186,22 @@ public: const NamesAndTypesList & index_names_and_types_, const std::shared_ptr & hive_settings_, ContextPtr context_) - : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + : IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) { } - virtual FileFormat getFormat() const override { return FileFormat::ORC; } - virtual String getName() const override { return "ORC"; } - virtual bool hasMinMaxIndex() const override; - virtual void loadMinMaxIndex() override; + FileFormat getFormat() const override { return FileFormat::ORC; } + bool useFileMinMaxIndex() const override; + void loadMinMaxIndex() override; - virtual bool hasSubMinMaxIndex() const override; - virtual void loadSubMinMaxIndex() override; + bool useSplitMinMaxIndex() const override; + void loadSubMinMaxIndex() override; -protected: - virtual std::unique_ptr buildMinMaxIndex(const orc::Statistics * statistics); - virtual Range buildRange(const orc::ColumnStatistics * col_stats); - virtual void prepareReader(); - virtual void prepareColumnMapping(); +private: + std::unique_ptr buildMinMaxIndex(const orc::Statistics * statistics); + Range buildRange(const orc::ColumnStatistics * col_stats); + void prepareReader(); + void prepareColumnMapping(); std::unique_ptr in; std::unique_ptr reader; @@ -223,7 +212,7 @@ class HiveParquetFile : public IHiveFile { public: HiveParquetFile( - const FieldVector & values_, + const FieldVector & partition_values_, const String & namenode_url_, const String & path_, UInt64 last_modify_time_, @@ -231,18 +220,17 @@ public: const NamesAndTypesList & index_names_and_types_, const std::shared_ptr & hive_settings_, ContextPtr context_) - : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + : IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) { } - virtual FileFormat getFormat() const override { return FileFormat::PARQUET; } - virtual String getName() const override { return "PARQUET"; } + FileFormat getFormat() const override { return FileFormat::PARQUET; } - virtual bool hasSubMinMaxIndex() const override; - virtual void loadSubMinMaxIndex() override; + bool useSplitMinMaxIndex() const override; + void loadSubMinMaxIndex() override; -protected: - virtual void prepareReader(); +private: + void prepareReader(); std::unique_ptr in; std::unique_ptr reader; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index b4b0862f5d4..c11db119ab7 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -543,7 +543,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded( /// Load file level minmax index and apply const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); - if (hive_file->hasMinMaxIndex()) + if (hive_file->useFileMinMaxIndex()) { hive_file->loadMinMaxIndex(); if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes()) @@ -556,7 +556,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded( } /// Load sub-file level minmax index and apply - if (hive_file->hasSubMinMaxIndex()) + if (hive_file->useSplitMinMaxIndex()) { std::unordered_set skip_splits; hive_file->loadSubMinMaxIndex();