mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
remove some useless virtual and rename some functions in HiveFile
This commit is contained in:
parent
2ef316801c
commit
acc7046d54
@ -145,7 +145,7 @@ void HiveOrcFile::prepareColumnMapping()
|
||||
}
|
||||
}
|
||||
|
||||
bool HiveOrcFile::hasMinMaxIndex() const
|
||||
bool HiveOrcFile::useFileMinMaxIndex() const
|
||||
{
|
||||
return storage_settings->enable_orc_file_minmax_index;
|
||||
}
|
||||
@ -196,7 +196,7 @@ void HiveOrcFile::loadMinMaxIndex()
|
||||
minmax_idx = buildMinMaxIndex(statistics.get());
|
||||
}
|
||||
|
||||
bool HiveOrcFile::hasSubMinMaxIndex() const
|
||||
bool HiveOrcFile::useSplitMinMaxIndex() const
|
||||
{
|
||||
return storage_settings->enable_orc_stripe_minmax_index;
|
||||
}
|
||||
@ -226,7 +226,7 @@ void HiveOrcFile::loadSubMinMaxIndex()
|
||||
}
|
||||
}
|
||||
|
||||
bool HiveParquetFile::hasSubMinMaxIndex() const
|
||||
bool HiveParquetFile::useSplitMinMaxIndex() const
|
||||
{
|
||||
return storage_settings->enable_parquet_rowgroup_minmax_index;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
}
|
||||
|
||||
IHiveFile(
|
||||
const FieldVector & values_,
|
||||
const FieldVector & partition_values_,
|
||||
const String & namenode_url_,
|
||||
const String & path_,
|
||||
UInt64 last_modify_time_,
|
||||
@ -85,7 +85,7 @@ public:
|
||||
const std::shared_ptr<HiveSettings> & storage_settings_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, partition_values(values_)
|
||||
, partition_values(partition_values_)
|
||||
, namenode_url(namenode_url_)
|
||||
, path(path_)
|
||||
, last_modify_time(last_modify_time_)
|
||||
@ -96,56 +96,47 @@ public:
|
||||
}
|
||||
virtual ~IHiveFile() = default;
|
||||
|
||||
virtual FileFormat getFormat() const = 0;
|
||||
String getFormatName() const { return String(magic_enum::enum_name(getFormat())); }
|
||||
const String & getPath() const { return path; }
|
||||
UInt64 getLastModTs() const { return last_modify_time; }
|
||||
size_t getSize() const { return size; }
|
||||
const FieldVector & getPartitionValues() const { return partition_values; }
|
||||
const String & getNamenodeUrl() { return namenode_url; }
|
||||
MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; }
|
||||
const std::vector<MinMaxIndexPtr> & getSubMinMaxIndexes() const { return sub_minmax_idxes; }
|
||||
|
||||
virtual String getName() const = 0;
|
||||
const std::unordered_set<int> & getSkipSplits() const { return skip_splits; }
|
||||
void setSkipSplits(const std::unordered_set<int> & skip_splits_) { skip_splits = skip_splits_; }
|
||||
|
||||
virtual String getPath() const { return path; }
|
||||
|
||||
virtual FieldVector getPartitionValues() const { return partition_values; }
|
||||
|
||||
virtual String getNamenodeUrl() { return namenode_url; }
|
||||
|
||||
virtual bool hasMinMaxIndex() const { return false; }
|
||||
|
||||
virtual void loadMinMaxIndex()
|
||||
{
|
||||
throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
virtual MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; }
|
||||
|
||||
// Do hive file contains sub-file level minmax index?
|
||||
virtual bool hasSubMinMaxIndex() const { return false; }
|
||||
|
||||
virtual void loadSubMinMaxIndex()
|
||||
{
|
||||
throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
virtual const std::vector<MinMaxIndexPtr> & getSubMinMaxIndexes() const { return sub_minmax_idxes; }
|
||||
|
||||
virtual void setSkipSplits(const std::unordered_set<int> & skip_splits_) { skip_splits = skip_splits_; }
|
||||
|
||||
virtual const std::unordered_set<int> & getSkipSplits() const { return skip_splits; }
|
||||
|
||||
inline std::string describeMinMaxIndex(const MinMaxIndexPtr & idx) const
|
||||
String describeMinMaxIndex(const MinMaxIndexPtr & idx) const
|
||||
{
|
||||
if (!idx)
|
||||
return "";
|
||||
|
||||
std::vector<std::string> strs;
|
||||
std::vector<String> strs;
|
||||
strs.reserve(index_names_and_types.size());
|
||||
size_t i = 0;
|
||||
for (const auto & name_type : index_names_and_types)
|
||||
{
|
||||
strs.push_back(name_type.name + ":" + name_type.type->getName() + idx->hyperrectangle[i++].toString());
|
||||
}
|
||||
return boost::algorithm::join(strs, "|");
|
||||
}
|
||||
|
||||
inline UInt64 getLastModTs() const { return last_modify_time; }
|
||||
inline size_t getSize() const { return size; }
|
||||
virtual FileFormat getFormat() const = 0;
|
||||
|
||||
virtual bool useFileMinMaxIndex() const { return false; }
|
||||
|
||||
virtual void loadMinMaxIndex()
|
||||
{
|
||||
throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/// If hive query could use contains sub-file level minmax index?
|
||||
virtual bool useSplitMinMaxIndex() const { return false; }
|
||||
|
||||
virtual void loadSubMinMaxIndex()
|
||||
{
|
||||
throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
protected:
|
||||
FieldVector partition_values;
|
||||
@ -168,7 +159,7 @@ class HiveTextFile : public IHiveFile
|
||||
{
|
||||
public:
|
||||
HiveTextFile(
|
||||
const FieldVector & values_,
|
||||
const FieldVector & partition_values_,
|
||||
const String & namenode_url_,
|
||||
const String & path_,
|
||||
UInt64 last_modify_time_,
|
||||
@ -176,19 +167,18 @@ public:
|
||||
const NamesAndTypesList & index_names_and_types_,
|
||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||
ContextPtr context_)
|
||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
{
|
||||
}
|
||||
|
||||
virtual FileFormat getFormat() const override { return FileFormat::TEXT; }
|
||||
virtual String getName() const override { return "TEXT"; }
|
||||
};
|
||||
|
||||
class HiveOrcFile : public IHiveFile
|
||||
{
|
||||
public:
|
||||
HiveOrcFile(
|
||||
const FieldVector & values_,
|
||||
const FieldVector & partition_values_,
|
||||
const String & namenode_url_,
|
||||
const String & path_,
|
||||
UInt64 last_modify_time_,
|
||||
@ -196,23 +186,22 @@ public:
|
||||
const NamesAndTypesList & index_names_and_types_,
|
||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||
ContextPtr context_)
|
||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
{
|
||||
}
|
||||
|
||||
virtual FileFormat getFormat() const override { return FileFormat::ORC; }
|
||||
virtual String getName() const override { return "ORC"; }
|
||||
virtual bool hasMinMaxIndex() const override;
|
||||
virtual void loadMinMaxIndex() override;
|
||||
FileFormat getFormat() const override { return FileFormat::ORC; }
|
||||
bool useFileMinMaxIndex() const override;
|
||||
void loadMinMaxIndex() override;
|
||||
|
||||
virtual bool hasSubMinMaxIndex() const override;
|
||||
virtual void loadSubMinMaxIndex() override;
|
||||
bool useSplitMinMaxIndex() const override;
|
||||
void loadSubMinMaxIndex() override;
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<MinMaxIndex> buildMinMaxIndex(const orc::Statistics * statistics);
|
||||
virtual Range buildRange(const orc::ColumnStatistics * col_stats);
|
||||
virtual void prepareReader();
|
||||
virtual void prepareColumnMapping();
|
||||
private:
|
||||
std::unique_ptr<MinMaxIndex> buildMinMaxIndex(const orc::Statistics * statistics);
|
||||
Range buildRange(const orc::ColumnStatistics * col_stats);
|
||||
void prepareReader();
|
||||
void prepareColumnMapping();
|
||||
|
||||
std::unique_ptr<ReadBufferFromHDFS> in;
|
||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> reader;
|
||||
@ -223,7 +212,7 @@ class HiveParquetFile : public IHiveFile
|
||||
{
|
||||
public:
|
||||
HiveParquetFile(
|
||||
const FieldVector & values_,
|
||||
const FieldVector & partition_values_,
|
||||
const String & namenode_url_,
|
||||
const String & path_,
|
||||
UInt64 last_modify_time_,
|
||||
@ -231,18 +220,17 @@ public:
|
||||
const NamesAndTypesList & index_names_and_types_,
|
||||
const std::shared_ptr<HiveSettings> & hive_settings_,
|
||||
ContextPtr context_)
|
||||
: IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
|
||||
{
|
||||
}
|
||||
|
||||
virtual FileFormat getFormat() const override { return FileFormat::PARQUET; }
|
||||
virtual String getName() const override { return "PARQUET"; }
|
||||
FileFormat getFormat() const override { return FileFormat::PARQUET; }
|
||||
|
||||
virtual bool hasSubMinMaxIndex() const override;
|
||||
virtual void loadSubMinMaxIndex() override;
|
||||
bool useSplitMinMaxIndex() const override;
|
||||
void loadSubMinMaxIndex() override;
|
||||
|
||||
protected:
|
||||
virtual void prepareReader();
|
||||
private:
|
||||
void prepareReader();
|
||||
|
||||
std::unique_ptr<ReadBufferFromHDFS> in;
|
||||
std::unique_ptr<parquet::arrow::FileReader> reader;
|
||||
|
@ -543,7 +543,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded(
|
||||
|
||||
/// Load file level minmax index and apply
|
||||
const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
|
||||
if (hive_file->hasMinMaxIndex())
|
||||
if (hive_file->useFileMinMaxIndex())
|
||||
{
|
||||
hive_file->loadMinMaxIndex();
|
||||
if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes())
|
||||
@ -556,7 +556,7 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded(
|
||||
}
|
||||
|
||||
/// Load sub-file level minmax index and apply
|
||||
if (hive_file->hasSubMinMaxIndex())
|
||||
if (hive_file->useSplitMinMaxIndex())
|
||||
{
|
||||
std::unordered_set<int> skip_splits;
|
||||
hive_file->loadSubMinMaxIndex();
|
||||
|
Loading…
Reference in New Issue
Block a user