use a better range begin in virtual row

This commit is contained in:
jsc0218 2024-04-24 01:14:04 +00:00
parent cc3fd0e736
commit 7f6d640023
3 changed files with 8 additions and 4 deletions

View File

@ -597,7 +597,8 @@ Pipe ReadFromMergeTree::readInOrder(
processor->addPartLevelToChunk(isQueryWithFinal()); processor->addPartLevelToChunk(isQueryWithFinal());
processor->addVirtualRowToChunk(need_virtual_row, part_with_ranges.data_part->getIndex()); processor->addVirtualRowToChunk(need_virtual_row, part_with_ranges.data_part->getIndex(),
part_with_ranges.ranges.front().begin);
auto source = std::make_shared<MergeTreeSource>(std::move(processor)); auto source = std::make_shared<MergeTreeSource>(std::move(processor));
if (set_rows_approx) if (set_rows_approx)

View File

@ -155,7 +155,7 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
if (j < index.size() && type_and_name.name == primary_key.column_names[j] && type_and_name.type == primary_key.data_types[j]) if (j < index.size() && type_and_name.name == primary_key.column_names[j] && type_and_name.type == primary_key.data_types[j])
{ {
auto column = current_column->cloneEmpty(); auto column = current_column->cloneEmpty();
column->insert((*index[j])[0]); // TODO: use the first range pk whose range might contain results column->insert((*index[j])[mark_range_begin]);
ordered_columns.push_back(std::move(column)); ordered_columns.push_back(std::move(column));
++j; ++j;
} }

View File

@ -65,10 +65,11 @@ public:
void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; } void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; }
void addVirtualRowToChunk(bool add_virtual_row_, const Columns& index_) void addVirtualRowToChunk(bool add_virtual_row_, const Columns& index_, size_t mark_range_begin_)
{ {
add_virtual_row = add_virtual_row_; add_virtual_row = add_virtual_row_;
index = index_; index = index_;
mark_range_begin = mark_range_begin_;
} }
private: private:
@ -108,8 +109,10 @@ private:
/// Should we add a virtual row as the single first chunk. /// Should we add a virtual row as the single first chunk.
/// Virtual row is useful for read-in-order optimization when multiple parts exist. /// Virtual row is useful for read-in-order optimization when multiple parts exist.
bool add_virtual_row = false; bool add_virtual_row = false;
/// PK index used in virtual row.
Columns index; Columns index;
/// The first range that might contain the candidate, used in virtual row.
size_t mark_range_begin;
LoggerPtr log = getLogger("MergeTreeSelectProcessor"); LoggerPtr log = getLogger("MergeTreeSelectProcessor");
std::atomic<bool> is_cancelled{false}; std::atomic<bool> is_cancelled{false};