mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
fix performance of short queries with large number of columns
This commit is contained in:
parent
cc3ed12ed6
commit
fc9d72e75d
@ -468,7 +468,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
|
||||
std::unique_ptr<MergeTreeBlockSizePredictor> MergeTreeBaseSelectProcessor::getSizePredictor(
|
||||
const MergeTreeData::DataPartPtr & data_part,
|
||||
const MergeTreeReadTaskColumns & task_columns,
|
||||
const StorageMetadataPtr & metadata_snapshot)
|
||||
const Block & sample_block)
|
||||
{
|
||||
const auto & required_column_names = task_columns.columns.getNames();
|
||||
const auto & required_pre_column_names = task_columns.pre_columns.getNames();
|
||||
@ -476,7 +476,7 @@ std::unique_ptr<MergeTreeBlockSizePredictor> MergeTreeBaseSelectProcessor::getSi
|
||||
complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());
|
||||
|
||||
return std::make_unique<MergeTreeBlockSizePredictor>(
|
||||
data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock());
|
||||
data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block);
|
||||
}
|
||||
|
||||
MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default;
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
static std::unique_ptr<MergeTreeBlockSizePredictor> getSizePredictor(
|
||||
const MergeTreeData::DataPartPtr & data_part,
|
||||
const MergeTreeReadTaskColumns & task_columns,
|
||||
const StorageMetadataPtr & metadata_snapshot);
|
||||
const Block & sample_block);
|
||||
|
||||
protected:
|
||||
Chunk generate() final;
|
||||
|
@ -18,7 +18,7 @@ try
|
||||
}
|
||||
|
||||
auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
|
||||
: getSizePredictor(data_part, task_columns, metadata_snapshot);
|
||||
: getSizePredictor(data_part, task_columns, sample_block);
|
||||
|
||||
MarkRanges mark_ranges_for_task;
|
||||
/// If we need to read few rows, set one range per task to reduce number of read data.
|
||||
|
@ -231,7 +231,7 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
|
||||
auto task_columns = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns);
|
||||
|
||||
auto size_predictor = !predict_block_size_bytes ? nullptr
|
||||
: MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, metadata_snapshot);
|
||||
: MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, sample_block);
|
||||
|
||||
per_part_size_predictor.emplace_back(std::move(size_predictor));
|
||||
|
||||
|
@ -27,7 +27,7 @@ try
|
||||
all_mark_ranges.pop_back();
|
||||
|
||||
auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
|
||||
: getSizePredictor(data_part, task_columns, metadata_snapshot);
|
||||
: getSizePredictor(data_part, task_columns, sample_block);
|
||||
|
||||
task = std::make_unique<MergeTreeReadTask>(
|
||||
data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set,
|
||||
|
@ -31,6 +31,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
||||
reader_settings_, use_uncompressed_cache_, virt_column_names_},
|
||||
required_columns{std::move(required_columns_)},
|
||||
data_part{owned_data_part_},
|
||||
sample_block(metadata_snapshot_->getSampleBlock()),
|
||||
all_mark_ranges(std::move(mark_ranges_)),
|
||||
part_index_in_query(part_index_in_query_),
|
||||
has_limit_below_one_block(has_limit_below_one_block_),
|
||||
|
@ -51,6 +51,9 @@ protected:
|
||||
/// Data part will not be removed if the pointer owns it
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
|
||||
/// Cache getSampleBlock call, which might be heavy.
|
||||
Block sample_block;
|
||||
|
||||
/// Mark ranges we should read (in ascending order)
|
||||
MarkRanges all_mark_ranges;
|
||||
/// Value of _part_index virtual column (used only in SelectExecutor)
|
||||
|
Loading…
Reference in New Issue
Block a user