use a better range begin in virtual row

This commit is contained in:
jsc0218 2024-04-24 01:14:04 +00:00
parent cc3fd0e736
commit 7f6d640023
3 changed files with 8 additions and 4 deletions

View File

@ -597,7 +597,8 @@ Pipe ReadFromMergeTree::readInOrder(
processor->addPartLevelToChunk(isQueryWithFinal());
processor->addVirtualRowToChunk(need_virtual_row, part_with_ranges.data_part->getIndex());
processor->addVirtualRowToChunk(need_virtual_row, part_with_ranges.data_part->getIndex(),
part_with_ranges.ranges.front().begin);
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
if (set_rows_approx)

View File

@ -155,7 +155,7 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
if (j < index.size() && type_and_name.name == primary_key.column_names[j] && type_and_name.type == primary_key.data_types[j])
{
auto column = current_column->cloneEmpty();
column->insert((*index[j])[0]); // TODO: use the first range pk whose range might contain results
column->insert((*index[j])[mark_range_begin]);
ordered_columns.push_back(std::move(column));
++j;
}

View File

@ -65,10 +65,11 @@ public:
void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; }
void addVirtualRowToChunk(bool add_virtual_row_, const Columns& index_)
void addVirtualRowToChunk(bool add_virtual_row_, const Columns& index_, size_t mark_range_begin_)
{
add_virtual_row = add_virtual_row_;
index = index_;
mark_range_begin = mark_range_begin_;
}
private:
@ -108,8 +109,10 @@ private:
/// Should we add a virtual row as the single first chunk.
/// Virtual row is useful for read-in-order optimization when multiple parts exist.
bool add_virtual_row = false;
/// PK index used in virtual row.
Columns index;
/// The first range that might contain the candidate, used in virtual row.
size_t mark_range_begin;
LoggerPtr log = getLogger("MergeTreeSelectProcessor");
std::atomic<bool> is_cancelled{false};