From 4af435bddae5e3e8c0382d2ef6f5c1fedf91ba3d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 28 Jul 2022 21:22:06 +0000 Subject: [PATCH 1/2] Fix: handle all const columns case correctly --- .../Transforms/DistinctSortedTransform.cpp | 29 +++++++++++++++++-- .../Transforms/DistinctSortedTransform.h | 1 + ...7_distinct_in_order_optimization.reference | 10 +++++++ .../02317_distinct_in_order_optimization.sql | 8 +++++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 3762504fda5..a5f058aaa8e 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -8,6 +8,20 @@ namespace ErrorCodes extern const int SET_SIZE_LIMIT_EXCEEDED; } +static void handleAllColumnsConst(Chunk & chunk) +{ + const size_t rows = chunk.getNumRows(); + IColumn::Filter filter(rows); + + Chunk res_chunk; + std::fill(filter.begin(), filter.end(), 0); + filter[0] = 1; + for (const auto & column : chunk.getColumns()) + res_chunk.addColumn(column->filter(filter, -1)); + + chunk = std::move(res_chunk); +} + DistinctSortedTransform::DistinctSortedTransform( Block header_, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns) : ISimpleTransform(header_, header_, true) @@ -23,9 +37,12 @@ DistinctSortedTransform::DistinctSortedTransform( for (size_t i = 0; i < num_columns; ++i) { auto pos = column_names.empty() ? i : header.getPositionByName(column_names[i]); - const auto & col = header.getByPosition(pos).column; - if (col && !isColumnConst(*col)) + const auto & column = header.getByPosition(pos).column; + if (column && !isColumnConst(*column)) + { column_positions.emplace_back(pos); + all_columns_const = false; + } } column_ptrs.reserve(column_positions.size()); @@ -52,6 +69,14 @@ void DistinctSortedTransform::transform(Chunk & chunk) if (unlikely(!chunk.hasRows())) return; + /// special case - all column constant + if (unlikely(all_columns_const)) + { + handleAllColumnsConst(chunk); + stopReading(); + return; + } + /// get DISTINCT columns from chunk column_ptrs.clear(); for (const auto pos : column_positions) diff --git a/src/Processors/Transforms/DistinctSortedTransform.h b/src/Processors/Transforms/DistinctSortedTransform.h index 2fe40408683..440754ce6a4 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.h +++ b/src/Processors/Transforms/DistinctSortedTransform.h @@ -66,6 +66,7 @@ private: /// Restrictions on the maximum size of the output data. SizeLimits set_size_limits; + bool all_columns_const = true; }; } diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference index a5ae3759d5d..79927cf8884 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference @@ -77,4 +77,14 @@ 2 2 1 1 0 0 +-- distinct with constants columns +-- { echoOn } +select distinct 1 as a, 2 as b from distinct_in_order; +1 2 +select distinct 1 as a, 2 as b from distinct_in_order order by a; +1 2 +select distinct 1 as a, 2 as b from distinct_in_order order by a, b; +1 2 +select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by x; +1 2 -- check that distinct in order has the same result as ordinary distinct diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql index f1de07e3db2..945c9625e99 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -43,6 +43,14 @@ select distinct b,c from distinct_in_order order by c; select '-- distinct with non-key prefix and non-sorted column, order by non-sorted desc'; select distinct b,c from distinct_in_order order by c desc; +select '-- distinct with constants columns'; +-- { echoOn } +select distinct 1 as a, 2 as b from distinct_in_order; +select distinct 1 as a, 2 as b from distinct_in_order order by a; +select distinct 1 as a, 2 as b from distinct_in_order order by a, b; +select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by x; +-- { echoOff } + drop table if exists distinct_in_order sync; select '-- check that distinct in order has the same result as ordinary distinct'; From 11d5f1ea9a2ec52cfd6c0c3cbe8ad22021bdc993 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 29 Jul 2022 10:06:36 +0000 Subject: [PATCH 2/2] More tests --- ...7_distinct_in_order_optimization.reference | 24 +++++++++++++++---- .../02317_distinct_in_order_optimization.sql | 12 ++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference index 79927cf8884..efc9e28bcce 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference @@ -79,12 +79,28 @@ 0 0 -- distinct with constants columns -- { echoOn } -select distinct 1 as a, 2 as b from distinct_in_order; +select distinct 1 as x, 2 as y from distinct_in_order; 1 2 -select distinct 1 as a, 2 as b from distinct_in_order order by a; +select distinct 1 as x, 2 as y from distinct_in_order order by x; 1 2 -select distinct 1 as a, 2 as b from distinct_in_order order by a, b; +select distinct 1 as x, 2 as y from distinct_in_order order by x, y; 1 2 -select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by x; +select distinct a, 1 as x from distinct_in_order order by x; +0 1 +select distinct a, 1 as x, 2 as y from distinct_in_order order by a; +0 1 2 +select distinct a, b, 1 as x, 2 as y from distinct_in_order order by a; +0 0 1 2 +0 1 1 2 +0 2 1 2 +0 3 1 2 +0 4 1 2 +select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by y; 1 2 +select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_order order by a) order by b; +0 0 1 2 +0 1 1 2 +0 2 1 2 +0 3 1 2 +0 4 1 2 -- check that distinct in order has the same result as ordinary distinct diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql index 945c9625e99..7a70e2ef873 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -45,10 +45,14 @@ select distinct b,c from distinct_in_order order by c desc; select '-- distinct with constants columns'; -- { echoOn } -select distinct 1 as a, 2 as b from distinct_in_order; -select distinct 1 as a, 2 as b from distinct_in_order order by a; -select distinct 1 as a, 2 as b from distinct_in_order order by a, b; -select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by x; +select distinct 1 as x, 2 as y from distinct_in_order; +select distinct 1 as x, 2 as y from distinct_in_order order by x; +select distinct 1 as x, 2 as y from distinct_in_order order by x, y; +select distinct a, 1 as x from distinct_in_order order by x; +select distinct a, 1 as x, 2 as y from distinct_in_order order by a; +select distinct a, b, 1 as x, 2 as y from distinct_in_order order by a; +select distinct x, y from (select 1 as x, 2 as y from distinct_in_order order by x) order by y; +select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_order order by a) order by b; -- { echoOff } drop table if exists distinct_in_order sync;