fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization

This commit is contained in:
Anton Popov 2020-05-07 01:27:35 +03:00
parent 69dcd80e15
commit cbbd8e521c
5 changed files with 65 additions and 1 deletions

View File

@ -813,6 +813,14 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
return res;
}
static ExpressionActionsPtr createProjection(const Pipe & pipe, const MergeTreeData & data)
{
const auto & header = pipe.getHeader();
auto projection = std::make_shared<ExpressionActions>(header.getNamesAndTypesList(), data.global_context);
projection->add(ExpressionAction::project(header.getNames()));
return projection;
}
Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
RangesInDataParts && parts,
size_t num_streams,
@ -999,13 +1007,19 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
sort_description.emplace_back(data.sorting_key_columns[j],
input_sorting_info->direction, 1);
/// Project input columns to drop columns from sorting_key_prefix_expr
/// to allow execute the same expression later.
/// NOTE: It may lead to double computation of expression.
auto projection = createProjection(pipes.back(), data);
for (auto & pipe : pipes)
pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), sorting_key_prefix_expr));
auto merging_sorted = std::make_shared<MergingSortedTransform>(
pipes.back().getHeader(), pipes.size(), sort_description, max_block_size);
res.emplace_back(std::move(pipes), std::move(merging_sorted));
Pipe merged(std::move(pipes), std::move(merging_sorted));
merged.addSimpleTransform(std::make_shared<ExpressionTransform>(merged.getHeader(), projection));
res.emplace_back(std::move(merged));
}
else
res.emplace_back(std::move(pipes.front()));
@ -1051,6 +1065,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
use_uncompressed_cache = false;
Pipes pipes;
/// Project input columns to drop columns from sorting_key_expr
/// to allow execute the same expression later.
/// NOTE: It may lead to double computation of expression.
ExpressionActionsPtr projection;
for (const auto & part : parts)
{
@ -1061,6 +1079,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
virt_columns, part.part_index_in_query);
Pipe pipe(std::move(source_processor));
if (!projection)
projection = createProjection(pipe, data);
pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), data.sorting_key_expr));
pipes.emplace_back(std::move(pipe));
}
@ -1133,6 +1154,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
if (merged_processor)
{
Pipe pipe(std::move(pipes), std::move(merged_processor));
pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), projection));
pipes = Pipes();
pipes.emplace_back(std::move(pipe));
}

View File

@ -0,0 +1,10 @@
2020-05-05 01:00:00 0
2020-05-05 01:00:00 1
2020-05-05 01:00:00 2
2020-05-05 01:00:00 3
2020-05-05 01:00:00 4
999999
999999
999999
999998
999998

View File

@ -0,0 +1,19 @@
DROP TABLE IF EXISTS pk_func;
CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d);
INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(1000000);
INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(1000000);
INSERT INTO pk_func SELECT '2020-05-07 01:00:00', number FROM numbers(1000000);
SELECT * FROM pk_func ORDER BY toDate(d), ui LIMIT 5;
DROP TABLE pk_func;
CREATE TABLE pk_func(i UInt32) ENGINE = MergeTree ORDER BY -i;
INSERT INTO pk_func SELECT number FROM numbers(1000000);
INSERT INTO pk_func SELECT number FROM numbers(1000000);
INSERT INTO pk_func SELECT number FROM numbers(1000000);
SELECT * FROM pk_func ORDER BY -i LIMIT 5;
DROP TABLE pk_func;

View File

@ -0,0 +1,3 @@
2020-05-05 704982704
2020-05-06 704982704
2020-05-07 704982704

View File

@ -0,0 +1,10 @@
DROP TABLE IF EXISTS pk_func;
CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = SummingMergeTree ORDER BY toDate(d);
INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(100000);
INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(100000);
INSERT INTO pk_func SELECT '2020-05-07 01:00:00', number FROM numbers(100000);
SELECT toDate(d), ui FROM pk_func FINAL;
DROP TABLE pk_func;