From d80a21a44552ba1484a99bef5d0d95e6d5a09b10 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 26 Jun 2022 08:23:52 +0000 Subject: [PATCH 1/2] Distinct sorted: calculate column positions once in constructor - instead of calculating them on every chunk --- .../Transforms/DistinctSortedTransform.cpp | 29 +++++++++---------- .../Transforms/DistinctSortedTransform.h | 4 ++- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 13d039ebcae..a4e5983de82 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -13,10 +13,20 @@ DistinctSortedTransform::DistinctSortedTransform( : ISimpleTransform(header_, header_, true) , header(std::move(header_)) , description(std::move(sort_description)) - , columns_names(columns) + , column_names(columns) , limit_hint(limit_hint_) , set_size_limits(set_size_limits_) { + /// pre-calculate column positions to process during chunk transformation + const size_t num_columns = column_names.empty() ? header_.columns() : column_names.size(); + column_positions.reserve(num_columns); + for (size_t i = 0; i < num_columns; ++i) + { + auto pos = column_names.empty() ? i : header_.getPositionByName(column_names[i]); + const auto & col = header_.getByPosition(pos).column; + if (col && !isColumnConst(*col)) + column_positions.emplace_back(pos); + } } void DistinctSortedTransform::transform(Chunk & chunk) @@ -119,24 +129,13 @@ bool DistinctSortedTransform::buildFilter( ColumnRawPtrs DistinctSortedTransform::getKeyColumns(const Chunk & chunk) const { - size_t columns = columns_names.empty() ? chunk.getNumColumns() : columns_names.size(); - ColumnRawPtrs column_ptrs; - column_ptrs.reserve(columns); - - for (size_t i = 0; i < columns; ++i) + column_ptrs.reserve(column_positions.size()); + for (const auto pos : column_positions) { - auto pos = i; - if (!columns_names.empty()) - pos = input.getHeader().getPositionByName(columns_names[i]); - const auto & column = chunk.getColumns()[pos]; - - /// Ignore all constant columns. - if (!isColumnConst(*column)) - column_ptrs.emplace_back(column.get()); + column_ptrs.emplace_back(column.get()); } - return column_ptrs; } diff --git a/src/Processors/Transforms/DistinctSortedTransform.h b/src/Processors/Transforms/DistinctSortedTransform.h index 0530a6689e9..72acde0716b 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.h +++ b/src/Processors/Transforms/DistinctSortedTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -57,7 +58,8 @@ private: }; PreviousChunk prev_chunk; - Names columns_names; + Names column_names; + ColumnNumbers column_positions; ClearableSetVariants data; Sizes key_sizes; UInt64 limit_hint; From c1840e798ceeec72e315503d635a2e53e6d890c7 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 28 Jun 2022 20:04:27 +0000 Subject: [PATCH 2/2] Fix: wrong header variable was used --- src/Processors/Transforms/DistinctSortedTransform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index a4e5983de82..73e89c774ae 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -17,13 +17,13 @@ DistinctSortedTransform::DistinctSortedTransform( , limit_hint(limit_hint_) , set_size_limits(set_size_limits_) { - /// pre-calculate column positions to process during chunk transformation - const size_t num_columns = column_names.empty() ? header_.columns() : column_names.size(); + /// pre-calculate column positions to use during chunk transformation + const size_t num_columns = column_names.empty() ? header.columns() : column_names.size(); column_positions.reserve(num_columns); for (size_t i = 0; i < num_columns; ++i) { - auto pos = column_names.empty() ? i : header_.getPositionByName(column_names[i]); - const auto & col = header_.getByPosition(pos).column; + auto pos = column_names.empty() ? i : header.getPositionByName(column_names[i]); + const auto & col = header.getByPosition(pos).column; if (col && !isColumnConst(*col)) column_positions.emplace_back(pos); }