diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index 1849bd0e9a5..eb63764947c 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -45,8 +45,31 @@ public: return std::make_shared(); } + ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + + auto aggregation_keys_number = (*grouping_set_map_column)[0].get(); + + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto indexes = (*argument_keys_column)[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (index.get() < aggregation_keys_number ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.size() == 2) + return executeSingleGroupingSet(arguments, input_rows_count); + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 40a5f055243..5bececb70ae 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -477,7 +477,8 @@ ActionsMatcher::Data::Data( bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_) + bool create_source_for_in_, + bool has_grouping_set_column_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -490,6 +491,7 @@ ActionsMatcher::Data::Data( , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) + , has_grouping_set_column(has_grouping_set_column_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) @@ -842,19 +844,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { ColumnWithTypeAndName column; column.name = "__grouping_set_map"; - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) + if (data.has_grouping_set_column) { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + } + else + { + column.type = std::make_shared(); + auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); data.addColumn(column); } @@ -875,11 +886,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addColumn(column); } - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set", "__grouping_set_map", arguments_column_name }, - column_name - ); + if (data.has_grouping_set_column) + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set", "__grouping_set_map", arguments_column_name }, + column_name + ); + } + else + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set_map", arguments_column_name }, + column_name + ); + } return; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b7d2905ac73..3f7f6b5b127 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -97,6 +97,7 @@ public: bool no_makeset; bool only_consts; bool create_source_for_in; + bool has_grouping_set_column; size_t visit_depth; ScopeStack actions_stack; @@ -120,7 +121,8 @@ public: bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_); + bool create_source_for_in_, + bool has_grouping_set_column_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1a2cb4ace1a..9c74693e6a2 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -333,8 +333,10 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. - if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1; + if (process_grouping_sets) aggregated_columns.emplace_back("__grouping_set", std::make_shared()); + need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets; for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { @@ -452,8 +454,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - // if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) - // aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + if (!select_query->group_by_with_grouping_sets) + { + auto & list = aggregation_keys_indexes_list.emplace_back(); + for (size_t i = 0; i < aggregation_keys.size(); ++i) + list.push_back(i); + } if (group_asts.empty()) { @@ -598,7 +604,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ no_makeset_for_subqueries, false /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -620,7 +627,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, true /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -643,7 +651,8 @@ void ExpressionAnalyzer::getRootActionsForHaving( no_makeset_for_subqueries, false /* no_makeset */, only_consts, - true /* create_source_for_in */); + true /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1200091efef..5db4fda0fcf 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -77,6 +77,8 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. TemporaryTablesMapping external_tables; + + bool need_grouping_set_column = false; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5f165f9d535..6bfadc66352 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -757,9 +756,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } - // if (analysis_result.use_grouping_set_key) - // res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); - return res; } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 704aeeeea7c..b3f53de3c99 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -89,6 +89,8 @@ public: bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool needGroupingSetColumn() const noexcept { return group_by_with_cube || group_by_with_rollup || group_by_with_grouping_sets; } + ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 9c2b5a44914..b830c7899bb 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -50,10 +50,6 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); - // auto map_column = ColumnFixedString::create(keys_size + 1); - // map_column->resize(rows); - // res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); - return res; } @@ -243,22 +239,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } - // { - // std::string grouping_map; - // grouping_map.reserve(params.keys_size + 1); - // std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); - // for (auto key : params.keys) - // grouping_map += key_set.contains(key) ? '1' : '0'; - // grouping_map += '0'; - // auto nested_column = ColumnFixedString::create(params.keys_size + 1); - // nested_column->insertString(grouping_map); - // auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); - // const auto * grouping_map_node = &dag->addColumn( - // {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); - // grouping_map_node = &dag->materializeNode(*grouping_map_node); - // index.push_back(grouping_map_node); - // } - dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ef25612f63e..c1824206b60 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference new file mode 100644 index 00000000000..38578d6ad1d --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -0,0 +1,20 @@ +0 1 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +0 1 1 +1 1 1 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +6 1 1 +7 1 1 +8 1 1 +9 1 1 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql new file mode 100644 index 00000000000..5b12c34adac --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -0,0 +1,18 @@ +SELECT + number, + grouping(number, number % 2, number % 3) = 6 +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; + +SELECT + number, + grouping(number), + GROUPING(number % 2) +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number;