fix performance of short queries with large number of columns

This commit is contained in:
Anton Popov 2021-08-02 15:03:55 +03:00
parent cc3ed12ed6
commit fc9d72e75d
7 changed files with 10 additions and 6 deletions

View File

@ -468,7 +468,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
std::unique_ptr<MergeTreeBlockSizePredictor> MergeTreeBaseSelectProcessor::getSizePredictor(
const MergeTreeData::DataPartPtr & data_part,
const MergeTreeReadTaskColumns & task_columns,
const StorageMetadataPtr & metadata_snapshot)
const Block & sample_block)
{
const auto & required_column_names = task_columns.columns.getNames();
const auto & required_pre_column_names = task_columns.pre_columns.getNames();
@ -476,7 +476,7 @@ std::unique_ptr<MergeTreeBlockSizePredictor> MergeTreeBaseSelectProcessor::getSi
complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());
return std::make_unique<MergeTreeBlockSizePredictor>(
data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock());
data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block);
}
MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default;

View File

@ -40,7 +40,7 @@ public:
static std::unique_ptr<MergeTreeBlockSizePredictor> getSizePredictor(
const MergeTreeData::DataPartPtr & data_part,
const MergeTreeReadTaskColumns & task_columns,
const StorageMetadataPtr & metadata_snapshot);
const Block & sample_block);
protected:
Chunk generate() final;

View File

@ -18,7 +18,7 @@ try
}
auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
: getSizePredictor(data_part, task_columns, metadata_snapshot);
: getSizePredictor(data_part, task_columns, sample_block);
MarkRanges mark_ranges_for_task;
/// If we need to read few rows, set one range per task to reduce number of read data.

View File

@ -231,7 +231,7 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
auto task_columns = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns);
auto size_predictor = !predict_block_size_bytes ? nullptr
: MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, metadata_snapshot);
: MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, sample_block);
per_part_size_predictor.emplace_back(std::move(size_predictor));

View File

@ -27,7 +27,7 @@ try
all_mark_ranges.pop_back();
auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
: getSizePredictor(data_part, task_columns, metadata_snapshot);
: getSizePredictor(data_part, task_columns, sample_block);
task = std::make_unique<MergeTreeReadTask>(
data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set,

View File

@ -31,6 +31,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
reader_settings_, use_uncompressed_cache_, virt_column_names_},
required_columns{std::move(required_columns_)},
data_part{owned_data_part_},
sample_block(metadata_snapshot_->getSampleBlock()),
all_mark_ranges(std::move(mark_ranges_)),
part_index_in_query(part_index_in_query_),
has_limit_below_one_block(has_limit_below_one_block_),

View File

@ -51,6 +51,9 @@ protected:
/// Data part will not be removed if the pointer owns it
MergeTreeData::DataPartPtr data_part;
/// Cache getSampleBlock call, which might be heavy.
Block sample_block;
/// Mark ranges we should read (in ascending order)
MarkRanges all_mark_ranges;
/// Value of _part_index virtual column (used only in SelectExecutor)