From 83e9e5d0e59bfaeacf6353543a999408812fc247 Mon Sep 17 00:00:00 2001 From: fanzhou Date: Mon, 6 Sep 2021 10:18:42 +0800 Subject: [PATCH] some changes --- src/Core/ColumnNumbers.h | 2 +- src/Core/NamesAndTypes.h | 2 +- src/Interpreters/Aggregator.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.h | 4 +- src/Interpreters/InterpreterSelectQuery.cpp | 79 ++++++------------- .../Transforms/GroupingSetsTransform.h | 2 +- src/QueryPipeline/Pipe.cpp | 4 +- .../01883_with_grouping_sets.reference | 7 +- 9 files changed, 40 insertions(+), 66 deletions(-) diff --git a/src/Core/ColumnNumbers.h b/src/Core/ColumnNumbers.h index 82868ef3cd3..29b4c49dc83 100644 --- a/src/Core/ColumnNumbers.h +++ b/src/Core/ColumnNumbers.h @@ -8,6 +8,6 @@ namespace DB { using ColumnNumbers = std::vector; -using ColumnNumbersTwoDimension = std::vector; +using ColumnNumbersList = std::vector; } diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index dacd5a459dc..2719017a726 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -109,7 +109,7 @@ public: std::optional tryGetByName(const std::string & name) const; }; -using TwoDimensionNamesAndTypesList = std::list; +using NamesAndTypesLists = std::vector; } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index b9dca0b09ae..e8635c4e31a 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -879,7 +879,7 @@ public: /// What to count. ColumnNumbers keys; - const ColumnNumbersTwoDimension keys_vector; + const ColumnNumbersList keys_vector; const AggregateDescriptions aggregates; size_t keys_size; const size_t aggregates_size; @@ -943,7 +943,7 @@ public: Params( const Block & src_header_, const ColumnNumbers & keys_, - const ColumnNumbersTwoDimension & keys_vector_, const AggregateDescriptions & aggregates_, + const ColumnNumbersList & keys_vector_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, size_t max_bytes_before_external_group_by_, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index cd2e00c832b..1834479df69 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -395,7 +395,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - aggregation_keys_list.push_back(grouping_set_list); + aggregation_keys_list.push_back(std::move(grouping_set_list)); } else { diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 7598efa40c6..19054e795a6 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -64,8 +64,8 @@ struct ExpressionAnalyzerData bool has_aggregation = false; NamesAndTypesList aggregation_keys; + NamesAndTypesLists aggregation_keys_list; bool has_const_aggregation_keys = false; - TwoDimensionNamesAndTypesList aggregation_keys_list; AggregateDescriptions aggregate_descriptions; WindowDescriptions window_descriptions; @@ -324,7 +324,7 @@ public: const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; } bool hasConstAggregationKeys() const { return has_const_aggregation_keys; } - const TwoDimensionNamesAndTypesList & aggregationKeysList() const { return aggregation_keys_list; } + const NamesAndTypesLists & aggregationKeysList() const { return aggregation_keys_list; } const AggregateDescriptions & aggregates() const { return aggregate_descriptions; } const PreparedSets & getPreparedSets() const { return prepared_sets; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 995f25816bb..970a1362699 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2062,61 +2062,35 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac return; const auto & header_before_aggregation = query_plan.getCurrentDataStream().header; - ColumnNumbers keys; - ColumnNumbers all_keys; - ColumnNumbersTwoDimension keys_vector; - auto & query = getSelectQuery(); - if (query.group_by_with_grouping_sets) - { - std::set keys_set; - for (const auto & aggregation_keys : query_analyzer->aggregationKeysList()) - { - keys.clear(); - for (const auto & key : aggregation_keys) - { - size_t key_name_pos = header_before_aggregation.getPositionByName(key.name); - if (!keys_set.count(key_name_pos)) - { - keys_set.insert(key_name_pos); - } - keys.push_back(key_name_pos); - LOG_DEBUG( - log, - "execute aggregation with grouping sets add key with name {} and number {}", - key.name, - header_before_aggregation.getPositionByName(key.name)); - } - keys_vector.push_back(keys); - LOG_DEBUG( - log, - "execute aggregation with grouping sets add keys set of size {}", - keys.size()); - } - all_keys.assign(keys_set.begin(), keys_set.end()); - LOG_DEBUG( - log, - "execute aggregation with grouping sets add all keys of size {}", - all_keys.size()); - } - else - { - for (const auto & key : query_analyzer->aggregationKeys()) - { - keys.push_back(header_before_aggregation.getPositionByName(key.name)); - LOG_DEBUG(log, "execute aggregation without grouping sets pushed back key with name {} and number {}", key.name, header_before_aggregation.getPositionByName(key.name)); - } - } AggregateDescriptions aggregates = query_analyzer->aggregates(); for (auto & descr : aggregates) if (descr.arguments.empty()) for (const auto & name : descr.argument_names) descr.arguments.push_back(header_before_aggregation.getPositionByName(name)); - const Settings & settings = context->getSettingsRef(); std::shared_ptr params_ptr; + + auto & query = getSelectQuery(); if (query.group_by_with_grouping_sets) { + ColumnNumbers keys; + ColumnNumbers all_keys; + ColumnNumbersList keys_vector; + std::unordered_set keys_set; + for (const auto & aggregation_keys : query_analyzer->aggregationKeysList()) + { + keys.clear(); + for (const auto & key : aggregation_keys) + { + size_t key_name_pos = header_before_aggregation.getPositionByName(key.name); + keys_set.insert(key_name_pos); + keys.push_back(key_name_pos); + } + keys_vector.push_back(keys); + } + all_keys.assign(keys_set.begin(), keys_set.end()); + params_ptr = std::make_shared( header_before_aggregation, all_keys, @@ -2139,6 +2113,10 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac } else { + ColumnNumbers keys; + for (const auto & key : query_analyzer->aggregationKeys()) + keys.push_back(header_before_aggregation.getPositionByName(key.name)); + params_ptr = std::make_shared( header_before_aggregation, keys, @@ -2160,13 +2138,10 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac } SortDescription group_by_sort_description; - if (group_by_info && settings.optimize_aggregation_in_order && !query.group_by_with_grouping_sets) { + if (group_by_info && settings.optimize_aggregation_in_order && !query.group_by_with_grouping_sets) group_by_sort_description = getSortDescriptionFromGroupBy(query); - LOG_DEBUG(log, "execute aggregation without grouping sets got group_by_sort_description"); - } else { + else group_by_info = nullptr; - LOG_DEBUG(log, "execute aggregation didn't get group_by_sort_description"); - } auto merge_threads = max_streams; auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads @@ -2175,7 +2150,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac bool storage_has_evenly_distributed_read = storage && storage->hasEvenlyDistributedRead(); - LOG_DEBUG(log, "execute aggregation header structure before step: {}", query_plan.getCurrentDataStream().header.dumpStructure()); auto aggregating_step = std::make_unique( query_plan.getCurrentDataStream(), *params_ptr, @@ -2187,7 +2161,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac storage_has_evenly_distributed_read, std::move(group_by_info), std::move(group_by_sort_description)); - LOG_DEBUG(log, "execute aggregation header structure after step: {}", aggregating_step->getOutputStream().header.dumpStructure()); query_plan.addStep(std::move(aggregating_step)); } @@ -2245,7 +2218,7 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific const auto & header_before_transform = query_plan.getCurrentDataStream().header; ColumnNumbers keys; ColumnNumbers all_keys; - ColumnNumbersTwoDimension keys_vector; + ColumnNumbersList keys_vector; auto & query = getSelectQuery(); if (query.group_by_with_grouping_sets) { diff --git a/src/Processors/Transforms/GroupingSetsTransform.h b/src/Processors/Transforms/GroupingSetsTransform.h index 4019b2ffa52..f9c688b2cc8 100644 --- a/src/Processors/Transforms/GroupingSetsTransform.h +++ b/src/Processors/Transforms/GroupingSetsTransform.h @@ -18,7 +18,7 @@ protected: private: AggregatingTransformParamsPtr params; ColumnNumbers keys; - ColumnNumbersTwoDimension keys_vector; + ColumnNumbersList keys_vector; Chunks consumed_chunks; Chunk grouping_sets_chunk; diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 184cad18c17..dd4a2cb6c77 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -464,9 +464,9 @@ void Pipe::addParallelTransforms(Processors transforms) "but " + std::to_string(output_ports.size()) + " expected", ErrorCodes::LOGICAL_ERROR); size_t next_output = 0; - for (auto * input : inputs) + for (size_t i = 0; i < inputs.size(); ++i) { - connect(*output_ports[next_output], *input); + connect(*output_ports[next_output], *inputs[i]); ++next_output; } diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.reference b/tests/queries/0_stateless/01883_with_grouping_sets.reference index a1d55448f3f..258fa6f5abd 100644 --- a/tests/queries/0_stateless/01883_with_grouping_sets.reference +++ b/tests/queries/0_stateless/01883_with_grouping_sets.reference @@ -18,6 +18,7 @@ 2 3 0 5200 2 4 0 4800 2 5 0 5400 + 0 0 1 1 4500 0 0 2 2 4600 0 0 3 3 4700 @@ -38,6 +39,7 @@ 2 3 0 0 5200 2 4 0 0 4800 2 5 0 0 5400 + 1 0 24500 1 1 4500 1 3 4700 @@ -50,8 +52,8 @@ 2 6 5000 2 8 5200 2 10 5400 - 0 0 49500 + 1 0 24500 1 1 4500 1 3 4700 @@ -64,5 +66,4 @@ 2 6 5000 2 8 5200 2 10 5400 - -0 0 49500 \ No newline at end of file +0 0 49500