From 48eb9954613eae87316243e012848dba4ff2ded3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 6 Aug 2021 17:23:50 +0300 Subject: [PATCH] Fix unknown column bug in sampling. --- .../QueryPlan/ReadFromMergeTree.cpp | 8 ++++---- ..._sampling_and_unknown_column_bug.reference | 2 ++ .../02002_sampling_and_unknown_column_bug.sql | 20 +++++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.reference create mode 100644 tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.sql diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f8c12449c7e..379f44cdb5d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -995,21 +995,18 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build Block cur_header = result_projection ? result_projection->getResultColumns() : pipe.getHeader(); - auto append_actions = [&result_projection, &cur_header](ActionsDAGPtr actions) + auto append_actions = [&result_projection](ActionsDAGPtr actions) { if (!result_projection) result_projection = std::move(actions); else result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); - - cur_header = result_projection->getResultColumns(); }; /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. if (sample_factor_column_queried) { ColumnWithTypeAndName column; - column.name = "_sample_factor"; column.type = std::make_shared(); column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor)); @@ -1017,6 +1014,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build append_actions(std::move(adding_column)); } + if (result_projection) + cur_header = result_projection->updateHeader(cur_header); + /// Extra columns may be returned (for example, if sampling is used). /// Convert pipe to step header structure. if (!isCompatibleHeader(cur_header, getOutputStream().header)) diff --git a/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.reference b/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.reference new file mode 100644 index 00000000000..9315e86b328 --- /dev/null +++ b/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.reference @@ -0,0 +1,2 @@ +1 +1 1 1 diff --git a/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.sql b/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.sql new file mode 100644 index 00000000000..838d7a5526b --- /dev/null +++ b/tests/queries/0_stateless/02002_sampling_and_unknown_column_bug.sql @@ -0,0 +1,20 @@ +drop table if exists sessions; +CREATE TABLE sessions +( + `user_id` UInt64 +) +ENGINE = MergeTree +ORDER BY user_id +SAMPLE BY user_id; + +insert into sessions values(1); + +SELECT + sum(user_id * _sample_factor) +FROM sessions +SAMPLE 10000000; + +SELECT + uniq(user_id) a, min(_sample_factor) x, a*x +FROM sessions +SAMPLE 10000000;