From c5b40a9c91fc40bf929e54ef23af2bb33ee8c928 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 12 May 2022 16:39:50 +0000 Subject: [PATCH 01/24] WIP on GROUPING function --- src/Core/NamesAndTypes.cpp | 14 ++++ src/Core/NamesAndTypes.h | 2 + .../registerFunctionsMiscellaneous.cpp | 2 + src/Interpreters/ActionsVisitor.cpp | 50 +++++++++++- src/Interpreters/ActionsVisitor.h | 3 + src/Interpreters/ExpressionAnalyzer.cpp | 7 ++ src/Interpreters/InterpreterSelectQuery.cpp | 4 + src/Parsers/ExpressionElementParsers.cpp | 16 ++++ src/Processors/QueryPlan/AggregatingStep.cpp | 28 ++++++- src/Storages/VirtualColumnUtils.cpp | 2 +- .../02293_grouping_function.reference | 81 +++++++++++++++++++ .../0_stateless/02293_grouping_function.sql | 79 ++++++++++++++++++ 12 files changed, 280 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02293_grouping_function.reference create mode 100644 tests/queries/0_stateless/02293_grouping_function.sql diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index bd24a9e82bd..72768ce23fb 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -214,4 +215,17 @@ std::optional NamesAndTypesList::tryGetByName(const std::string } return {}; } + +size_t NamesAndTypesList::getPosByName(const std::string &name) const noexcept +{ + size_t pos = 0; + for (const NameAndTypePair & column : *this) + { + if (column.name == name) + break; + ++pos; + } + return pos; +} + } diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 2719017a726..c7a51f51816 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -107,6 +107,8 @@ public: /// Try to get column by name, return empty optional if column not found std::optional tryGetByName(const std::string & name) const; + + size_t getPosByName(const std::string & name) const noexcept; }; using NamesAndTypesLists = std::vector; diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 9cd9c70da16..9fe1fa69b5e 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -83,6 +83,7 @@ void registerFunctionZooKeeperSessionUptime(FunctionFactory &); void registerFunctionGetOSKernelVersion(FunctionFactory &); void registerFunctionGetTypeSerializationStreams(FunctionFactory &); void registerFunctionFlattenTuple(FunctionFactory &); +void registerFunctionGrouping(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -172,6 +173,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetOSKernelVersion(factory); registerFunctionGetTypeSerializationStreams(factory); registerFunctionFlattenTuple(factory); + registerFunctionGrouping(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c57b85951bc..b7efbc97cc9 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include @@ -12,6 +15,7 @@ #include #include #include +#include #include #include @@ -459,14 +463,23 @@ public: }; ActionsMatcher::Data::Data( - ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, ActionsDAGPtr actions_dag, - PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool no_makeset_, bool only_consts_, bool create_source_for_in_) + ContextPtr context_, + SizeLimits set_size_limit_, + size_t subquery_depth_, + const NamesAndTypesList & source_columns_, + const NamesAndTypesList & aggregation_keys_, + ActionsDAGPtr actions_dag, + PreparedSets & prepared_sets_, + SubqueriesForSets & subqueries_for_sets_, + bool no_subqueries_, + bool no_makeset_, + bool only_consts_, + bool create_source_for_in_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) , source_columns(source_columns_) + , aggregation_keys(aggregation_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) @@ -817,6 +830,35 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & return; } + if (node.name == "grouping") + { + auto arguments_column_name = data.getUniqueName("__grouping_args"); + { + ColumnWithTypeAndName column; + column.name = arguments_column_name; + column.type = std::make_shared(std::make_shared()); + Array arguments_to_keys_map; + for (auto const & arg : node.arguments->children) + { + size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + arguments_to_keys_map.push_back(pos); + } + auto arguments_column = ColumnArray::create(ColumnUInt64::create()); + arguments_column->insert(Field{arguments_to_keys_map}); + + column.column = ColumnConst::create(ColumnPtr(std::move(arguments_column)), 1); + + data.addColumn(column); + } + + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set_map", arguments_column_name }, + column_name + ); + return; + } + SetPtr prepared_set; if (checkFunctionIsInOrGlobalInOperator(node)) { diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index d1558cb961c..313eae9fc8d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -87,6 +88,7 @@ public: SizeLimits set_size_limit; size_t subquery_depth; const NamesAndTypesList & source_columns; + const NamesAndTypesList & aggregation_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; @@ -108,6 +110,7 @@ public: SizeLimits set_size_limit_, size_t subquery_depth_, const NamesAndTypesList & source_columns_, + const NamesAndTypesList & aggregation_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index e7325363c08..8c3ea878718 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -47,6 +47,7 @@ #include #include +#include #include #include @@ -442,6 +443,9 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } + if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + if (group_asts.empty()) { select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); @@ -577,6 +581,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, @@ -597,6 +602,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, @@ -618,6 +624,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..c8e04777574 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -756,6 +757,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } + if (analysis_result.use_grouping_set_key) + res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); + return res; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9150fee3bde..021d4356f41 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -801,6 +801,20 @@ namespace node = makeASTFunction("exists", subquery); return true; } + + bool parseGrouping(IParser::Pos & pos, ASTPtr & node, Expected & expected) + { + ASTPtr expr_list; + if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) + return false; + + auto res = std::make_shared(); + res->name = "grouping"; + res->arguments = expr_list; + res->children.push_back(res->arguments); + node = std::move(res); + return true; + } } @@ -886,6 +900,8 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") parsed_special_function = parseDateDiff(pos, node, expected); + else if (function_name_lowercase == "grouping") + parsed_special_function = parseGrouping(pos, node, expected); if (parsed_special_function.has_value()) return parsed_special_function.value() && ParserToken(TokenType::ClosingRoundBracket).ignore(pos); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index d7d62d07d92..87588facff2 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -12,7 +12,9 @@ #include #include #include +#include #include +#include namespace DB { @@ -33,7 +35,7 @@ static ITransformingStep::Traits getTraits() }; } -static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) +static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params, size_t keys_size) { if (params.empty()) return block; @@ -48,6 +50,10 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); + auto map_column = ColumnFixedString::create(keys_size + 1); + map_column->resize(rows); + res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); + return res; } @@ -63,7 +69,7 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) + : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_, params_.keys_size), getTraits(), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(std::move(final_)) @@ -210,7 +216,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B /// Here we create a DAG which fills missing keys and adds `__grouping_set` column auto dag = std::make_shared(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs index; - index.reserve(output_header.columns() + 1); + index.reserve(output_header.columns() + 2); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); const auto * grouping_node = &dag->addColumn( @@ -237,6 +243,22 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } + { + std::string grouping_map; + grouping_map.reserve(params.keys_size + 1); + std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); + for (auto key : params.keys) + grouping_map += key_set.contains(key) ? '1' : '0'; + grouping_map += '0'; + auto nested_column = ColumnFixedString::create(params.keys_size + 1); + nested_column->insertString(grouping_map); + auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); + const auto * grouping_map_node = &dag->addColumn( + {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); + grouping_map_node = &dag->materializeNode(*grouping_map_node); + index.push_back(grouping_map_node); + } + dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index d0840778c0f..99f3b86ac26 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference new file mode 100644 index 00000000000..5ea3ca4a15b --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -0,0 +1,81 @@ +0 2 +0 2 +0 4 +1 4 +2 4 +3 4 +4 4 +5 4 +6 4 +7 4 +8 4 +9 4 +0 1 +0 1 +0 4 +1 4 +2 4 +3 4 +4 4 +5 4 +6 4 +7 4 +8 4 +9 4 +0 0 +0 1 +0 1 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +0 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 10 0 +0 1 4 +1 1 4 +2 1 4 +3 1 4 +4 1 4 +5 1 4 +6 1 4 +7 1 4 +8 1 4 +9 1 4 +0 1 6 +1 1 6 +2 1 6 +3 1 6 +4 1 6 +5 1 6 +6 1 6 +7 1 6 +8 1 6 +9 1 6 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +0 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql new file mode 100644 index 00000000000..65771fd479d --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -0,0 +1,79 @@ +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 3, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2, number % 3) = 2 AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, grouping(number, number % 2, number % 3) = 2; + +SELECT + number, + count(), + grouping(number, number % 2, number % 3) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number, number % 2), + () + ) +ORDER BY (gr, number); + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2, number % 3) = 4 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2, number % 3) = 2 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; From 92575fc3e544121feafdcb9a86319ee57d9d393c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 12 May 2022 16:54:02 +0000 Subject: [PATCH 02/24] Add missing file --- src/Functions/grouping.cpp | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/Functions/grouping.cpp diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp new file mode 100644 index 00000000000..19e810edbd2 --- /dev/null +++ b/src/Functions/grouping.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class FunctionGrouping : public IFunction +{ +public: + static constexpr auto name = "grouping"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + bool isVariadic() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool isSuitableForConstantFolding() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + String getName() const override + { + return name; + } + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + //TODO: add assert for argument types + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto mask = grouping_set_column->getDataAt(i).toView(); + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); + auto indexes = (*argument_keys_column)[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}, Arg: {}, value: {}", mask, toString(indexes), value); + result->insert(Field(value)); + } + return result; + } + +}; + +void registerFunctionGrouping(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} From ae81268d4d057da377d13eb04f16047d0ff2acf9 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 14:55:50 +0000 Subject: [PATCH 03/24] Try to compute helper column lazy --- src/Core/Names.h | 1 + src/Functions/grouping.cpp | 9 +++-- src/Interpreters/ActionsVisitor.cpp | 28 +++++++++++++- src/Interpreters/ActionsVisitor.h | 3 ++ src/Interpreters/ExpressionAnalyzer.cpp | 22 ++++++++--- src/Interpreters/ExpressionAnalyzer.h | 2 + src/Interpreters/InterpreterSelectQuery.cpp | 4 +- src/Processors/QueryPlan/AggregatingStep.cpp | 40 ++++++++++---------- src/Storages/VirtualColumnUtils.cpp | 2 +- 9 files changed, 79 insertions(+), 32 deletions(-) diff --git a/src/Core/Names.h b/src/Core/Names.h index 3281daa560e..003168bde27 100644 --- a/src/Core/Names.h +++ b/src/Core/Names.h @@ -16,6 +16,7 @@ using NameOrderedSet = std::set; using NameToNameMap = std::unordered_map; using NameToNameSetMap = std::unordered_map; using NameToNameVector = std::vector>; +using NameToIndexMap = std::unordered_map; using NameWithAlias = std::pair; using NamesWithAliases = std::vector; diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index 19e810edbd2..c1b349ce9da 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,5 +1,6 @@ #include #include +#include "Columns/ColumnsNumber.h" #include #include #include @@ -49,14 +50,16 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) { - auto mask = grouping_set_column->getDataAt(i).toView(); + UInt64 set_index = grouping_set_column->get64(i); + auto mask = grouping_set_map_column->getDataAt(set_index).toView(); LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); auto indexes = (*argument_keys_column)[i].get(); UInt64 value = 0; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b7efbc97cc9..2e2cc49af58 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -11,6 +12,7 @@ #include #include +#include #include #include #include @@ -468,6 +470,7 @@ ActionsMatcher::Data::Data( size_t subquery_depth_, const NamesAndTypesList & source_columns_, const NamesAndTypesList & aggregation_keys_, + const ColumnNumbersList & grouping_set_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -480,6 +483,7 @@ ActionsMatcher::Data::Data( , subquery_depth(subquery_depth_) , source_columns(source_columns_) , aggregation_keys(aggregation_keys_) + , grouping_set_keys(grouping_set_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) @@ -834,6 +838,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { auto arguments_column_name = data.getUniqueName("__grouping_args"); { + if (!data.hasColumn("__grouping_set_map")) + { + ColumnWithTypeAndName column; + column.name = "__grouping_set_map"; + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + auto map_column = ColumnFixedString::create(map_size); + map_column->insertString(key_map); + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + + data.addColumn(column); + } ColumnWithTypeAndName column; column.name = arguments_column_name; column.type = std::make_shared(std::make_shared()); @@ -853,7 +879,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addFunction( FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set_map", arguments_column_name }, + { "__grouping_set", "__grouping_set_map", arguments_column_name }, column_name ); return; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 313eae9fc8d..b7d2905ac73 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -89,6 +90,7 @@ public: size_t subquery_depth; const NamesAndTypesList & source_columns; const NamesAndTypesList & aggregation_keys; + const ColumnNumbersList & grouping_set_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; @@ -111,6 +113,7 @@ public: size_t subquery_depth_, const NamesAndTypesList & source_columns_, const NamesAndTypesList & aggregation_keys_, + const ColumnNumbersList & grouping_set_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8c3ea878718..1a2cb4ace1a 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -43,6 +43,8 @@ #include #include +#include +#include #include #include @@ -326,7 +328,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { if (ASTPtr group_by_ast = select_query->groupBy()) { - NameSet unique_keys; + NameToIndexMap unique_keys; ASTs & group_asts = group_by_ast->children; /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column @@ -348,6 +350,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) group_elements_ast = group_ast_element->children; NamesAndTypesList grouping_set_list; + ColumnNumbers grouping_set_indexes_list; for (ssize_t j = 0; j < ssize_t(group_elements_ast.size()); ++j) { @@ -388,15 +391,21 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// Aggregation keys are unique. if (!unique_keys.contains(key.name)) { - unique_keys.insert(key.name); + unique_keys[key.name] = aggregation_keys.size(); + grouping_set_indexes_list.push_back(aggregation_keys.size()); aggregation_keys.push_back(key); /// Key is no longer needed, therefore we can save a little by moving it. aggregated_columns.push_back(std::move(key)); } + else + { + grouping_set_indexes_list.push_back(unique_keys[key.name]); + } } aggregation_keys_list.push_back(std::move(grouping_set_list)); + aggregation_keys_indexes_list.push_back(std::move(grouping_set_indexes_list)); } else { @@ -434,7 +443,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// Aggregation keys are uniqued. if (!unique_keys.contains(key.name)) { - unique_keys.insert(key.name); + unique_keys[key.name] = aggregation_keys.size(); aggregation_keys.push_back(key); /// Key is no longer needed, therefore we can save a little by moving it. @@ -443,8 +452,8 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) - aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + // if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + // aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); if (group_asts.empty()) { @@ -582,6 +591,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -603,6 +613,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -625,6 +636,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b3704095c92..1200091efef 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -65,6 +66,7 @@ struct ExpressionAnalyzerData bool has_aggregation = false; NamesAndTypesList aggregation_keys; NamesAndTypesLists aggregation_keys_list; + ColumnNumbersList aggregation_keys_indexes_list; bool has_const_aggregation_keys = false; AggregateDescriptions aggregate_descriptions; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c8e04777574..5f165f9d535 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -757,8 +757,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } - if (analysis_result.use_grouping_set_key) - res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); + // if (analysis_result.use_grouping_set_key) + // res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); return res; } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 87588facff2..9c2b5a44914 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -35,7 +35,7 @@ static ITransformingStep::Traits getTraits() }; } -static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params, size_t keys_size) +static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) { if (params.empty()) return block; @@ -50,9 +50,9 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); - auto map_column = ColumnFixedString::create(keys_size + 1); - map_column->resize(rows); - res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); + // auto map_column = ColumnFixedString::create(keys_size + 1); + // map_column->resize(rows); + // res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); return res; } @@ -69,7 +69,7 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_, params_.keys_size), getTraits(), false) + : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(std::move(final_)) @@ -243,21 +243,21 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } - { - std::string grouping_map; - grouping_map.reserve(params.keys_size + 1); - std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); - for (auto key : params.keys) - grouping_map += key_set.contains(key) ? '1' : '0'; - grouping_map += '0'; - auto nested_column = ColumnFixedString::create(params.keys_size + 1); - nested_column->insertString(grouping_map); - auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); - const auto * grouping_map_node = &dag->addColumn( - {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); - grouping_map_node = &dag->materializeNode(*grouping_map_node); - index.push_back(grouping_map_node); - } + // { + // std::string grouping_map; + // grouping_map.reserve(params.keys_size + 1); + // std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); + // for (auto key : params.keys) + // grouping_map += key_set.contains(key) ? '1' : '0'; + // grouping_map += '0'; + // auto nested_column = ColumnFixedString::create(params.keys_size + 1); + // nested_column->insertString(grouping_map); + // auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); + // const auto * grouping_map_node = &dag->addColumn( + // {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); + // grouping_map_node = &dag->materializeNode(*grouping_map_node); + // index.push_back(grouping_map_node); + // } dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 99f3b86ac26..ef25612f63e 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); From efb30bdf6446be2364aff687ed590cc8a02c8c46 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 18:20:12 +0000 Subject: [PATCH 04/24] Correctly use __grouping_set_map column --- src/Functions/grouping.cpp | 12 +++++------- src/Interpreters/ActionsVisitor.cpp | 2 -- .../0_stateless/02293_grouping_function.reference | 12 ++++++++++++ .../queries/0_stateless/02293_grouping_function.sql | 10 ++++++++++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index c1b349ce9da..1849bd0e9a5 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,14 +1,11 @@ #include -#include #include "Columns/ColumnsNumber.h" #include #include -#include #include #include #include #include -#include namespace DB { @@ -54,18 +51,19 @@ public: auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); + auto masks = (*grouping_set_map_column)[0].get(); + auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) { UInt64 set_index = grouping_set_column->get64(i); - auto mask = grouping_set_map_column->getDataAt(set_index).toView(); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); + auto mask = masks[set_index].get(); + auto indexes = (*argument_keys_column)[i].get(); UInt64 value = 0; for (auto index : indexes) value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}, Arg: {}, value: {}", mask, toString(indexes), value); + result->insert(Field(value)); } return result; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 2e2cc49af58..40a5f055243 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -850,8 +850,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & std::string key_map(map_size, '0'); for (auto index : grouping_set) key_map[index] = '1'; - auto map_column = ColumnFixedString::create(map_size); - map_column->insertString(key_map); maps_per_set.push_back(key_map); } auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index 5ea3ca4a15b..f08e6d0ea99 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -79,3 +79,15 @@ 9 0 0 +0 0 +0 1 +0 1 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 65771fd479d..3555f9dabab 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -77,3 +77,13 @@ GROUP BY HAVING grouping(number, number % 2, number % 3) = 2 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; + +SELECT + number, + GROUPING(number, number % 2, number % 3) = 2 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2)) +ORDER BY number, gr; From 6fc7dfea809eabb81111306d6dcb4f2ce53e53a5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 23:04:12 +0000 Subject: [PATCH 05/24] Support ordinary GROUP BY --- src/Functions/grouping.cpp | 23 ++++++++ src/Interpreters/ActionsVisitor.cpp | 56 +++++++++++++------ src/Interpreters/ActionsVisitor.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 21 +++++-- src/Interpreters/ExpressionAnalyzer.h | 2 + src/Interpreters/InterpreterSelectQuery.cpp | 4 -- src/Parsers/ASTSelectQuery.h | 2 + src/Processors/QueryPlan/AggregatingStep.cpp | 20 ------- src/Storages/VirtualColumnUtils.cpp | 2 +- ...02293_grouping_function_group_by.reference | 20 +++++++ .../02293_grouping_function_group_by.sql | 18 ++++++ 11 files changed, 123 insertions(+), 49 deletions(-) create mode 100644 tests/queries/0_stateless/02293_grouping_function_group_by.reference create mode 100644 tests/queries/0_stateless/02293_grouping_function_group_by.sql diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index 1849bd0e9a5..eb63764947c 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -45,8 +45,31 @@ public: return std::make_shared(); } + ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + + auto aggregation_keys_number = (*grouping_set_map_column)[0].get(); + + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto indexes = (*argument_keys_column)[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (index.get() < aggregation_keys_number ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.size() == 2) + return executeSingleGroupingSet(arguments, input_rows_count); + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 40a5f055243..5bececb70ae 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -477,7 +477,8 @@ ActionsMatcher::Data::Data( bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_) + bool create_source_for_in_, + bool has_grouping_set_column_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -490,6 +491,7 @@ ActionsMatcher::Data::Data( , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) + , has_grouping_set_column(has_grouping_set_column_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) @@ -842,19 +844,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { ColumnWithTypeAndName column; column.name = "__grouping_set_map"; - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) + if (data.has_grouping_set_column) { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + } + else + { + column.type = std::make_shared(); + auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); data.addColumn(column); } @@ -875,11 +886,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addColumn(column); } - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set", "__grouping_set_map", arguments_column_name }, - column_name - ); + if (data.has_grouping_set_column) + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set", "__grouping_set_map", arguments_column_name }, + column_name + ); + } + else + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set_map", arguments_column_name }, + column_name + ); + } return; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b7d2905ac73..3f7f6b5b127 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -97,6 +97,7 @@ public: bool no_makeset; bool only_consts; bool create_source_for_in; + bool has_grouping_set_column; size_t visit_depth; ScopeStack actions_stack; @@ -120,7 +121,8 @@ public: bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_); + bool create_source_for_in_, + bool has_grouping_set_column_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1a2cb4ace1a..9c74693e6a2 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -333,8 +333,10 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. - if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1; + if (process_grouping_sets) aggregated_columns.emplace_back("__grouping_set", std::make_shared()); + need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets; for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { @@ -452,8 +454,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - // if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) - // aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + if (!select_query->group_by_with_grouping_sets) + { + auto & list = aggregation_keys_indexes_list.emplace_back(); + for (size_t i = 0; i < aggregation_keys.size(); ++i) + list.push_back(i); + } if (group_asts.empty()) { @@ -598,7 +604,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ no_makeset_for_subqueries, false /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -620,7 +627,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, true /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -643,7 +651,8 @@ void ExpressionAnalyzer::getRootActionsForHaving( no_makeset_for_subqueries, false /* no_makeset */, only_consts, - true /* create_source_for_in */); + true /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1200091efef..5db4fda0fcf 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -77,6 +77,8 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. TemporaryTablesMapping external_tables; + + bool need_grouping_set_column = false; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5f165f9d535..6bfadc66352 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -757,9 +756,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } - // if (analysis_result.use_grouping_set_key) - // res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); - return res; } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 704aeeeea7c..b3f53de3c99 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -89,6 +89,8 @@ public: bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool needGroupingSetColumn() const noexcept { return group_by_with_cube || group_by_with_rollup || group_by_with_grouping_sets; } + ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 9c2b5a44914..b830c7899bb 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -50,10 +50,6 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); - // auto map_column = ColumnFixedString::create(keys_size + 1); - // map_column->resize(rows); - // res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); - return res; } @@ -243,22 +239,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } - // { - // std::string grouping_map; - // grouping_map.reserve(params.keys_size + 1); - // std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); - // for (auto key : params.keys) - // grouping_map += key_set.contains(key) ? '1' : '0'; - // grouping_map += '0'; - // auto nested_column = ColumnFixedString::create(params.keys_size + 1); - // nested_column->insertString(grouping_map); - // auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); - // const auto * grouping_map_node = &dag->addColumn( - // {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); - // grouping_map_node = &dag->materializeNode(*grouping_map_node); - // index.push_back(grouping_map_node); - // } - dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ef25612f63e..c1824206b60 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference new file mode 100644 index 00000000000..38578d6ad1d --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -0,0 +1,20 @@ +0 1 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +0 1 1 +1 1 1 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +6 1 1 +7 1 1 +8 1 1 +9 1 1 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql new file mode 100644 index 00000000000..5b12c34adac --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -0,0 +1,18 @@ +SELECT + number, + grouping(number, number % 2, number % 3) = 6 +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; + +SELECT + number, + grouping(number), + GROUPING(number % 2) +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; From e5b395e0542ad723a2ead3f0702f0f6f4203dbdc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 16 May 2022 17:33:38 +0000 Subject: [PATCH 06/24] Support ROLLUP and CUBE in GROUPING function --- src/Functions/grouping.cpp | 65 ++++++++++- src/Interpreters/ActionsVisitor.cpp | 63 ++++++---- src/Interpreters/ActionsVisitor.h | 13 ++- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++- src/Interpreters/ExpressionAnalyzer.h | 3 +- src/Processors/QueryPlan/AggregatingStep.cpp | 11 ++ src/Processors/QueryPlan/AggregatingStep.h | 2 + src/Processors/QueryPlan/CubeStep.cpp | 3 +- src/Processors/QueryPlan/RollupStep.cpp | 3 +- src/Processors/Transforms/CubeTransform.cpp | 5 +- src/Processors/Transforms/CubeTransform.h | 1 + src/Processors/Transforms/RollupTransform.cpp | 5 +- src/Processors/Transforms/RollupTransform.h | 1 + src/Storages/VirtualColumnUtils.cpp | 2 +- ...02293_grouping_function_group_by.reference | 108 ++++++++++++++++++ .../02293_grouping_function_group_by.sql | 50 ++++++++ 16 files changed, 315 insertions(+), 39 deletions(-) diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index eb63764947c..c6c6061307d 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,8 +1,11 @@ #include -#include "Columns/ColumnsNumber.h" +#include #include #include +#include #include +#include +#include #include #include #include @@ -45,7 +48,7 @@ public: return std::make_shared(); } - ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeOrdinaryGroupBy(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); @@ -65,16 +68,70 @@ public: return result; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeRollup( + const ColumnUInt64 * grouping_set_column, + const ColumnConst & argument_keys_column, + UInt64 keys, + size_t input_rows_count) const + { + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + + auto indexes = argument_keys_column[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (index.get() < keys - set_index ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + + ColumnPtr executeCube( + const ColumnUInt64 * grouping_set_column, + const ColumnConst & argument_keys_column, + UInt64 keys, + size_t input_rows_count) const + { + static constexpr auto ONE = static_cast(1); + auto result = std::make_shared()->createColumn(); + auto mask_base = (ONE << keys) - 1; + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + auto mask = mask_base - set_index; + auto indexes = argument_keys_column[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (mask & (ONE << (keys - index.get() - 1)) ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t input_rows_count) const override { if (arguments.size() == 2) - return executeSingleGroupingSet(arguments, input_rows_count); + return executeOrdinaryGroupBy(arguments, input_rows_count); auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); auto masks = (*grouping_set_map_column)[0].get(); + auto grouping_set_map_elem_type = applyVisitor(FieldToDataType(), masks[0]); + if (!isString(grouping_set_map_elem_type)) + { + bool is_rollup = masks[0].get() == 0; + auto keys = masks[1].get(); + if (is_rollup) + return executeRollup(grouping_set_column, *argument_keys_column, keys, input_rows_count); + else + return executeCube(grouping_set_column, *argument_keys_column, keys, input_rows_count); + } auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 5bececb70ae..70493e5fefc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -62,6 +62,7 @@ namespace ErrorCodes extern const int INCORRECT_ELEMENT_OF_SET; extern const int BAD_ARGUMENTS; extern const int DUPLICATE_COLUMN; + extern const int LOGICAL_ERROR; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -478,7 +479,7 @@ ActionsMatcher::Data::Data( bool no_makeset_, bool only_consts_, bool create_source_for_in_, - bool has_grouping_set_column_) + GroupByKind group_by_kind_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -491,7 +492,7 @@ ActionsMatcher::Data::Data( , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) - , has_grouping_set_column(has_grouping_set_column_) + , group_by_kind(group_by_kind_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) @@ -844,27 +845,47 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { ColumnWithTypeAndName column; column.name = "__grouping_set_map"; - if (data.has_grouping_set_column) + switch (data.group_by_kind) { - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) + case GroupByKind::GROUPING_SETS: { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - } - else - { - column.type = std::make_shared(); - auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + case GroupByKind::ROLLUP: + case GroupByKind::CUBE: + { + column.type = std::make_shared(std::make_shared()); + auto grouping_set_map_column = ColumnArray::create(ColumnUInt64::create()); + Array kind_and_keys_size; + kind_and_keys_size.push_back(data.group_by_kind == GroupByKind::ROLLUP ? 0 : 1); + kind_and_keys_size.push_back(data.aggregation_keys.size()); + grouping_set_map_column->insert(kind_and_keys_size); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; + } + case GroupByKind::ORDINARY: + { + column.type = std::make_shared(); + auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); } data.addColumn(column); @@ -886,7 +907,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addColumn(column); } - if (data.has_grouping_set_column) + if (data.group_by_kind != GroupByKind::ORDINARY) { data.addFunction( FunctionFactory::instance().get("grouping", data.getContext()), diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 3f7f6b5b127..5fd228ba836 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -78,6 +78,15 @@ class ASTIdentifier; class ASTFunction; class ASTLiteral; +enum class GroupByKind +{ + NONE, + ORDINARY, + ROLLUP, + CUBE, + GROUPING_SETS, +}; + /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. class ActionsMatcher { @@ -97,7 +106,7 @@ public: bool no_makeset; bool only_consts; bool create_source_for_in; - bool has_grouping_set_column; + GroupByKind group_by_kind; size_t visit_depth; ScopeStack actions_stack; @@ -122,7 +131,7 @@ public: bool no_makeset_, bool only_consts_, bool create_source_for_in_, - bool has_grouping_set_column_); + GroupByKind group_by_kind_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9c74693e6a2..f7f67c28f93 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -331,12 +331,19 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) NameToIndexMap unique_keys; ASTs & group_asts = group_by_ast->children; + if (select_query->group_by_with_rollup) + group_by_kind = GroupByKind::ROLLUP; + else if (select_query->group_by_with_cube) + group_by_kind = GroupByKind::CUBE; + else if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + group_by_kind = GroupByKind::GROUPING_SETS; + else + group_by_kind = GroupByKind::ORDINARY; + /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. - bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1; - if (process_grouping_sets) + if (group_by_kind != GroupByKind::ORDINARY) aggregated_columns.emplace_back("__grouping_set", std::make_shared()); - need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets; for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { @@ -605,7 +612,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -628,7 +635,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -652,7 +659,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 5db4fda0fcf..fb28c08ad23 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -78,7 +79,7 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. TemporaryTablesMapping external_tables; - bool need_grouping_set_column = false; + GroupByKind group_by_kind = GroupByKind::NONE; }; diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index b830c7899bb..0028088d03f 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -35,6 +35,17 @@ static ITransformingStep::Traits getTraits() }; } +Block appendGroupingSetColumn(Block header) +{ + Block res; + res.insert({std::make_shared(), "__grouping_set"}); + + for (auto & col : header) + res.insert(std::move(col)); + + return res; +} + static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) { if (params.empty()) diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index b933daaa474..4dd3d956350 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -25,6 +25,8 @@ struct GroupingSetsParams using GroupingSetsParamsList = std::vector; +Block appendGroupingSetColumn(Block header); + /// Aggregation. See AggregatingTransform. class AggregatingStep : public ITransformingStep { diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 23c5115ec68..43a6491157a 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -22,7 +23,7 @@ static ITransformingStep::Traits getTraits() } CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, params_->getHeader(), getTraits()) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) { /// Aggregation keys are distinct diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index acaeb2bc9a7..2961ef5ddbd 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -22,7 +23,7 @@ static ITransformingStep::Traits getTraits() } RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, params_->getHeader(), getTraits()) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) { /// Aggregation keys are distinct diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 456eccc732f..f185e7565ea 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -9,7 +10,7 @@ namespace ErrorCodes } CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) , keys(params->params.keys) { @@ -74,6 +75,8 @@ Chunk CubeTransform::generate() } finalizeChunk(gen_chunk); + if (!gen_chunk.empty()) + gen_chunk.addColumn(0, ColumnUInt64::create(gen_chunk.getNumRows(), grouping_set++)); return gen_chunk; } diff --git a/src/Processors/Transforms/CubeTransform.h b/src/Processors/Transforms/CubeTransform.h index 6d0e2338174..b6f60af6aca 100644 --- a/src/Processors/Transforms/CubeTransform.h +++ b/src/Processors/Transforms/CubeTransform.h @@ -28,6 +28,7 @@ private: Columns current_zero_columns; UInt64 mask = 0; + UInt64 grouping_set = 0; Chunk merge(Chunks && chunks, bool final); }; diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index fb51b5f6b45..2355e35f8fd 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -1,11 +1,12 @@ #include #include +#include namespace DB { RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) , keys(params->params.keys) { @@ -57,6 +58,8 @@ Chunk RollupTransform::generate() } finalizeChunk(gen_chunk); + if (!gen_chunk.empty()) + gen_chunk.addColumn(0, ColumnUInt64::create(gen_chunk.getNumRows(), set_counter++)); return gen_chunk; } diff --git a/src/Processors/Transforms/RollupTransform.h b/src/Processors/Transforms/RollupTransform.h index fd435740a63..e60c7e12de1 100644 --- a/src/Processors/Transforms/RollupTransform.h +++ b/src/Processors/Transforms/RollupTransform.h @@ -23,6 +23,7 @@ private: Chunks consumed_chunks; Chunk rollup_chunk; size_t last_removed_key = 0; + size_t set_counter = 0; Chunk merge(Chunks && chunks, bool final); }; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index c1824206b60..dd6c30e3c79 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, GroupByKind::NONE); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 38578d6ad1d..0285611b9fa 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -18,3 +18,111 @@ 7 1 1 8 1 1 9 1 1 +0 0 +0 4 +0 6 +1 4 +1 6 +2 4 +2 6 +3 4 +3 6 +4 4 +4 6 +5 4 +5 6 +6 4 +6 6 +7 4 +7 6 +8 4 +8 6 +9 4 +9 6 +0 0 +0 4 +0 6 +1 4 +1 6 +2 4 +2 6 +3 4 +3 6 +4 4 +4 6 +5 4 +5 6 +6 4 +6 6 +7 4 +7 6 +8 4 +8 6 +9 4 +9 6 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 +0 5 +0 6 +1 5 +1 6 +2 5 +2 6 +3 5 +3 6 +4 5 +4 6 +5 5 +5 6 +6 5 +6 6 +7 5 +7 6 +8 5 +8 6 +9 5 +9 6 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index 5b12c34adac..1b0fcdb9289 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -16,3 +16,53 @@ GROUP BY number, number % 2 ORDER BY number; + +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH ROLLUP +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH CUBE +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) + 3 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +HAVING grouping(number) != 0 +ORDER BY + number, gr; From 6356112a76c998e75ebfb6d1dbd9eeff593e200a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 18 May 2022 15:23:31 +0000 Subject: [PATCH 07/24] Refactor GROUPING function --- src/Core/ColumnNumbers.h | 3 + src/Functions/grouping.cpp | 159 ------------------ src/Functions/grouping.h | 151 +++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 - src/Interpreters/ActionsVisitor.cpp | 110 ++++-------- .../02293_grouping_function.reference | 84 ++++----- .../0_stateless/02293_grouping_function.sql | 27 ++- ...02293_grouping_function_group_by.reference | 80 ++++----- .../02293_grouping_function_group_by.sql | 13 +- 9 files changed, 297 insertions(+), 332 deletions(-) delete mode 100644 src/Functions/grouping.cpp create mode 100644 src/Functions/grouping.h diff --git a/src/Core/ColumnNumbers.h b/src/Core/ColumnNumbers.h index 29b4c49dc83..2c1f02f720d 100644 --- a/src/Core/ColumnNumbers.h +++ b/src/Core/ColumnNumbers.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -8,6 +9,8 @@ namespace DB { using ColumnNumbers = std::vector; +using ColumnNumbersSet = std::unordered_set; using ColumnNumbersList = std::vector; +using ColumnNumbersSetList = std::vector; } diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp deleted file mode 100644 index c6c6061307d..00000000000 --- a/src/Functions/grouping.cpp +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class FunctionGrouping : public IFunction -{ -public: - static constexpr auto name = "grouping"; - static FunctionPtr create(ContextPtr) - { - return std::make_shared(); - } - - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool useDefaultImplementationForNulls() const override { return false; } - - bool isSuitableForConstantFolding() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - String getName() const override - { - return name; - } - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - //TODO: add assert for argument types - return std::make_shared(); - } - - ColumnPtr executeOrdinaryGroupBy(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const - { - auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); - - auto aggregation_keys_number = (*grouping_set_map_column)[0].get(); - - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - auto indexes = (*argument_keys_column)[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (index.get() < aggregation_keys_number ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeRollup( - const ColumnUInt64 * grouping_set_column, - const ColumnConst & argument_keys_column, - UInt64 keys, - size_t input_rows_count) const - { - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - - auto indexes = argument_keys_column[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (index.get() < keys - set_index ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeCube( - const ColumnUInt64 * grouping_set_column, - const ColumnConst & argument_keys_column, - UInt64 keys, - size_t input_rows_count) const - { - static constexpr auto ONE = static_cast(1); - auto result = std::make_shared()->createColumn(); - auto mask_base = (ONE << keys) - 1; - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - auto mask = mask_base - set_index; - auto indexes = argument_keys_column[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (mask & (ONE << (keys - index.get() - 1)) ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t input_rows_count) const override - { - if (arguments.size() == 2) - return executeOrdinaryGroupBy(arguments, input_rows_count); - - auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); - - auto masks = (*grouping_set_map_column)[0].get(); - auto grouping_set_map_elem_type = applyVisitor(FieldToDataType(), masks[0]); - if (!isString(grouping_set_map_elem_type)) - { - bool is_rollup = masks[0].get() == 0; - auto keys = masks[1].get(); - if (is_rollup) - return executeRollup(grouping_set_column, *argument_keys_column, keys, input_rows_count); - else - return executeCube(grouping_set_column, *argument_keys_column, keys, input_rows_count); - } - - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - auto mask = masks[set_index].get(); - - auto indexes = (*argument_keys_column)[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - -}; - -void registerFunctionGrouping(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h new file mode 100644 index 00000000000..a881616812b --- /dev/null +++ b/src/Functions/grouping.h @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "Core/ColumnNumbers.h" +#include "DataTypes/Serializations/ISerialization.h" +#include "base/types.h" + +namespace DB +{ + +class FunctionGroupingBase : public IFunction +{ +protected: + static constexpr UInt64 ONE = 1; + + const ColumnNumbers arguments_indexes; + +public: + FunctionGroupingBase(ColumnNumbers arguments_indexes_) + : arguments_indexes(std::move(arguments_indexes_)) + {} + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool isSuitableForConstantFolding() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + template + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const + { + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + + UInt64 value = 0; + for (auto index : arguments_indexes) + value = (value << 1) + (checker(set_index, index) ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } +}; + +class FunctionGroupingOrdinary : public FunctionGroupingBase +{ +public: + explicit FunctionGroupingOrdinary(ColumnNumbers arguments_indexes_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + {} + + String getName() const override { return "groupingOrdinary"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + UInt64 value = (ONE << arguments_indexes.size()) - 1; + return ColumnUInt64::create(input_rows_count, value); + } +}; + +class FunctionGroupingForRollup : public FunctionGroupingBase +{ + const UInt64 aggregation_keys_number; + +public: + FunctionGroupingForRollup(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + , aggregation_keys_number(aggregation_keys_number_) + {} + + String getName() const override { return "groupingForRollup"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + return arg_index < aggregation_keys_number - set_index; + } + ); + } +}; + +class FunctionGroupingForCube : public FunctionGroupingBase +{ + const UInt64 aggregation_keys_number; + +public: + + FunctionGroupingForCube(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_) + : FunctionGroupingBase(arguments_indexes_) + , aggregation_keys_number(aggregation_keys_number_) + {} + + String getName() const override { return "groupingForCube"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + auto set_mask = (ONE << aggregation_keys_number) - 1 - set_index; + return set_mask & (ONE << (aggregation_keys_number - arg_index - 1)); + } + ); + } +}; + +class FunctionGroupingForGroupingSets : public FunctionGroupingBase +{ + ColumnNumbersSetList grouping_sets; +public: + FunctionGroupingForGroupingSets(ColumnNumbers arguments_indexes_, ColumnNumbersList const & grouping_sets_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + { + for (auto const & set : grouping_sets_) + grouping_sets.emplace_back(set.begin(), set.end()); + } + + String getName() const override { return "groupingForGroupingSets"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + return grouping_sets[set_index].contains(arg_index); + } + ); + } +}; + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 9fe1fa69b5e..9cd9c70da16 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -83,7 +83,6 @@ void registerFunctionZooKeeperSessionUptime(FunctionFactory &); void registerFunctionGetOSKernelVersion(FunctionFactory &); void registerFunctionGetTypeSerializationStreams(FunctionFactory &); void registerFunctionFlattenTuple(FunctionFactory &); -void registerFunctionGrouping(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -173,7 +172,6 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetOSKernelVersion(factory); registerFunctionGetTypeSerializationStreams(factory); registerFunctionFlattenTuple(factory); - registerFunctionGrouping(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 70493e5fefc..4f44513a5ea 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,6 +6,7 @@ #include #include +#include #include #include @@ -839,89 +841,39 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (node.name == "grouping") { - auto arguments_column_name = data.getUniqueName("__grouping_args"); + ColumnNumbers arguments_indexes; + auto aggregation_keys_number = data.aggregation_keys.size(); + for (auto const & arg : node.arguments->children) { - if (!data.hasColumn("__grouping_set_map")) - { - ColumnWithTypeAndName column; - column.name = "__grouping_set_map"; - switch (data.group_by_kind) - { - case GroupByKind::GROUPING_SETS: - { - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) - { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); - } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - case GroupByKind::ROLLUP: - case GroupByKind::CUBE: - { - column.type = std::make_shared(std::make_shared()); - auto grouping_set_map_column = ColumnArray::create(ColumnUInt64::create()); - Array kind_and_keys_size; - kind_and_keys_size.push_back(data.group_by_kind == GroupByKind::ROLLUP ? 0 : 1); - kind_and_keys_size.push_back(data.aggregation_keys.size()); - grouping_set_map_column->insert(kind_and_keys_size); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - case GroupByKind::ORDINARY: - { - column.type = std::make_shared(); - auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); - } - - data.addColumn(column); - } - ColumnWithTypeAndName column; - column.name = arguments_column_name; - column.type = std::make_shared(std::make_shared()); - Array arguments_to_keys_map; - for (auto const & arg : node.arguments->children) - { - size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); - arguments_to_keys_map.push_back(pos); - } - auto arguments_column = ColumnArray::create(ColumnUInt64::create()); - arguments_column->insert(Field{arguments_to_keys_map}); - - column.column = ColumnConst::create(ColumnPtr(std::move(arguments_column)), 1); - - data.addColumn(column); + size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + if (pos == aggregation_keys_number) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of GROUPING function {} is not a part of GROUP BY clause", arg->getColumnName()); + arguments_indexes.push_back(pos); } - if (data.group_by_kind != GroupByKind::ORDINARY) + switch (data.group_by_kind) { - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set", "__grouping_set_map", arguments_column_name }, - column_name - ); - } - else - { - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set_map", arguments_column_name }, - column_name - ); + case GroupByKind::GROUPING_SETS: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.grouping_set_keys)), { "__grouping_set" }, column_name); + break; + } + case GroupByKind::ROLLUP: + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + break; + case GroupByKind::CUBE: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + break; + } + case GroupByKind::ORDINARY: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes))), {}, column_name); + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); } return; } diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index f08e6d0ea99..dbae7a11f2e 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -1,27 +1,27 @@ -0 2 -0 2 -0 4 -1 4 -2 4 -3 4 -4 4 -5 4 -6 4 -7 4 -8 4 -9 4 0 1 0 1 -0 4 -1 4 -2 4 -3 4 -4 4 -5 4 -6 4 -7 4 -8 4 -9 4 +0 2 +1 2 +2 2 +3 2 +4 2 +5 2 +6 2 +7 2 +8 2 +9 2 +0 1 +0 2 +0 2 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 0 0 0 1 0 1 @@ -47,26 +47,26 @@ 8 9 0 10 0 -0 1 4 -1 1 4 -2 1 4 -3 1 4 -4 1 4 -5 1 4 -6 1 4 -7 1 4 -8 1 4 -9 1 4 -0 1 6 -1 1 6 -2 1 6 -3 1 6 -4 1 6 -5 1 6 -6 1 6 -7 1 6 -8 1 6 -9 1 6 +0 1 2 +1 1 2 +2 1 2 +3 1 2 +4 1 2 +5 1 2 +6 1 2 +7 1 2 +8 1 2 +9 1 2 +0 1 3 +1 1 3 +2 1 3 +3 1 3 +4 1 3 +5 1 3 +6 1 3 +7 1 3 +8 1 3 +9 1 3 0 1 2 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 3555f9dabab..4bbf620a619 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -7,11 +7,11 @@ GROUP BY (number), (number % 2) ) -ORDER BY number, gr; +ORDER BY number, gr; -- { serverError BAD_ARGUMENTS } SELECT number, - grouping(number, number % 3, number % 2) AS gr + grouping(number, number % 2) AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -22,7 +22,18 @@ ORDER BY number, gr; SELECT number, - grouping(number, number % 2, number % 3) = 2 AS gr + grouping(number % 2, number) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2) = 1 AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -39,12 +50,12 @@ GROUP BY (number), (number % 2) ) -ORDER BY number, grouping(number, number % 2, number % 3) = 2; +ORDER BY number, grouping(number, number % 2) = 1; SELECT number, count(), - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -62,7 +73,7 @@ GROUP BY (number), (number % 2) ) -HAVING grouping(number, number % 2, number % 3) = 4 +HAVING grouping(number, number % 2) = 2 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; @@ -74,13 +85,13 @@ GROUP BY (number), (number % 2) ) -HAVING grouping(number, number % 2, number % 3) = 2 +HAVING grouping(number, number % 2) = 1 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; SELECT number, - GROUPING(number, number % 2, number % 3) = 2 as gr + GROUPING(number, number % 2) = 1 as gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY GROUPING SETS ( diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 0285611b9fa..9f73523728d 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -19,47 +19,47 @@ 8 1 1 9 1 1 0 0 -0 4 -0 6 -1 4 -1 6 -2 4 -2 6 -3 4 -3 6 -4 4 -4 6 -5 4 -5 6 -6 4 -6 6 -7 4 -7 6 -8 4 -8 6 -9 4 -9 6 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 0 0 -0 4 -0 6 -1 4 -1 6 -2 4 -2 6 -3 4 -3 6 -4 4 -4 6 -5 4 -5 6 -6 4 -6 6 -7 4 -7 6 -8 4 -8 6 -9 4 -9 6 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 0 0 0 1 0 1 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index 1b0fcdb9289..e9a0338c35a 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -2,6 +2,15 @@ SELECT number, grouping(number, number % 2, number % 3) = 6 FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; -- { serverError BAD_ARGUMENTS } + +SELECT + number, + grouping(number, number % 2) = 3 +FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY number, number % 2 @@ -19,7 +28,7 @@ ORDER BY number; SELECT number, - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY number, @@ -30,7 +39,7 @@ ORDER BY SELECT number, - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY ROLLUP(number, number % 2) From d4c66f4a4859fae200d740567572b11172b96a6d Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 19 May 2022 16:36:51 +0000 Subject: [PATCH 08/24] Code cleanup & fix GROUPING() with TOTALS --- src/Functions/grouping.h | 10 ++-- src/Interpreters/ActionsVisitor.cpp | 7 +++ src/Interpreters/InterpreterSelectQuery.cpp | 7 ++- src/Parsers/ASTSelectQuery.h | 34 +++++++------ src/Processors/QueryPlan/AggregatingStep.cpp | 2 +- src/Processors/QueryPlan/CubeStep.cpp | 23 ++++++++- src/Processors/QueryPlan/CubeStep.h | 1 + src/Processors/QueryPlan/RollupStep.cpp | 7 ++- src/Processors/QueryPlan/RollupStep.h | 1 + ...02293_grouping_function_group_by.reference | 48 +++++++++++++++++++ .../02293_grouping_function_group_by.sql | 38 +++++++++++++++ 11 files changed, 146 insertions(+), 32 deletions(-) diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index a881616812b..934be18345d 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -1,14 +1,14 @@ -#include +#pragma once + #include #include #include #include +#include +#include #include #include -#include -#include "Core/ColumnNumbers.h" -#include "DataTypes/Serializations/ISerialization.h" -#include "base/types.h" +#include namespace DB { diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 4f44513a5ea..d22989219a4 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -65,6 +65,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int DUPLICATE_COLUMN; extern const int LOGICAL_ERROR; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -841,6 +843,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (node.name == "grouping") { + size_t arguments_size = node.arguments->children.size(); + if (arguments_size == 0) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument"); + if (arguments_size > 64) + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); ColumnNumbers arguments_indexes; auto aggregation_keys_number = data.aggregation_keys.size(); for (auto const & arg : node.arguments->children) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..3b438ef9863 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1095,6 +1095,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

desc->type == ProjectionDescription::Type::Aggregate) { query_info.projection->aggregate_overflow_row = aggregate_overflow_row; @@ -1387,11 +1390,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs index; - index.reserve(output_header.columns() + 2); + index.reserve(output_header.columns() + 1); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); const auto * grouping_node = &dag->addColumn( diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 43a6491157a..91c85a08412 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include namespace DB { @@ -24,6 +26,7 @@ static ITransformingStep::Traits getTraits() CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) + , keys_size(params_->params.keys_size) , params(std::move(params_)) { /// Aggregation keys are distinct @@ -31,14 +34,30 @@ CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsP output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) +{ + auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + + auto grouping_col = ColumnUInt64::create(1, grouping_set_number); + const auto * grouping_node = &dag->addColumn( + {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); + + grouping_node = &dag->materializeNode(*grouping_node); + auto & index = dag->getIndex(); + index.insert(index.begin(), grouping_node); + + auto expression = std::make_shared(dag, settings.getActionsSettings()); + return std::make_shared(header, expression); +} + +void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { pipeline.resize(1); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; + return addGroupingSetForTotals(header, settings, (UInt64(1) << keys_size) - 1); return std::make_shared(header, std::move(params)); }); diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 1079bed5398..d3e26f9379f 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -21,6 +21,7 @@ public: const Aggregator::Params & getParams() const; private: + size_t keys_size; AggregatingTransformParamsPtr params; }; diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 2961ef5ddbd..3b061f9c246 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -25,20 +25,23 @@ static ITransformingStep::Traits getTraits() RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) + , keys_size(params->params.keys_size) { /// Aggregation keys are distinct for (auto key : params->params.keys) output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number); + +void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { pipeline.resize(1); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; + return addGroupingSetForTotals(header, settings, keys_size); return std::make_shared(header, std::move(params)); }); diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 7cd71fecdc1..3dce6f74d9f 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -20,6 +20,7 @@ public: private: AggregatingTransformParamsPtr params; + size_t keys_size; }; } diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 9f73523728d..021083db6eb 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -126,3 +126,51 @@ 8 6 9 5 9 6 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 + +0 0 +0 0 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 + +0 0 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index e9a0338c35a..b30080b88af 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -75,3 +75,41 @@ GROUP BY HAVING grouping(number) != 0 ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +ORDER BY + number, gr; From 37b66c8a9e70cab6b04ab87d4366ee0d42858a7f Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 12:48:48 +0000 Subject: [PATCH 09/24] Check format name on storage creation --- src/Formats/FormatFactory.cpp | 7 +++++++ src/Formats/FormatFactory.h | 3 +++ src/Storages/HDFS/StorageHDFS.cpp | 1 + src/Storages/StorageFile.cpp | 2 ++ src/Storages/StorageS3.cpp | 1 + src/Storages/StorageURL.cpp | 1 + .../02311_create_table_with_unknown_format.reference | 0 .../0_stateless/02311_create_table_with_unknown_format.sql | 4 ++++ 8 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/02311_create_table_with_unknown_format.reference create mode 100644 tests/queries/0_stateless/02311_create_table_with_unknown_format.sql diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4c1b23a75ab..33fd68e67f7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -585,6 +585,13 @@ bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name) return checkIfFormatHasSchemaReader(name) || checkIfFormatHasExternalSchemaReader(name); } +void FormatFactory::checkFormatName(const String & name) const +{ + auto it = dict.find(name); + if (it == dict.end()) + throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT); +} + FormatFactory & FormatFactory::instance() { static FormatFactory ret; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index f7d3c23d3b4..0431d1ef8c9 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -210,6 +210,9 @@ public: bool isInputFormat(const String & name) const; bool isOutputFormat(const String & name) const; + /// Check that format with specified name exists and throw an exception otherwise. + void checkFormatName(const String & name) const; + private: FormatsDictionary dict; FileExtensionFormats file_extension_formats; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index d114bb67016..a90db60e341 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -146,6 +146,7 @@ StorageHDFS::StorageHDFS( , distributed_processing(distributed_processing_) , partition_by(partition_by_) { + FormatFactory::instance().checkFormatName(format_name); context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); checkHDFSURL(uri_); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 47e32337dfe..a3a1d061ecc 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -382,6 +382,8 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.getContext()->getPath()) { + if (format_name != "Distributed") + FormatFactory::instance().checkFormatName(format_name); } void StorageFile::setStorageMetadata(CommonArguments args) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d402dce5ede..d7e3d5e5374 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -609,6 +609,7 @@ StorageS3::StorageS3( , partition_by(partition_by_) , is_key_with_globs(uri_.key.find_first_of("*?{") != std::string::npos) { + FormatFactory::instance().checkFormatName(format_name); context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri); StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0db4fa75aba..1961711785d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -74,6 +74,7 @@ IStorageURLBase::IStorageURLBase( , http_method(http_method_) , partition_by(partition_by_) { + FormatFactory::instance().checkFormatName(format_name); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.reference b/tests/queries/0_stateless/02311_create_table_with_unknown_format.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql new file mode 100644 index 00000000000..5f43ecd1c65 --- /dev/null +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -0,0 +1,4 @@ +create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=S3('http://host:2020/test/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=HDFS('http://hdfs:9000/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From b73d49158d73245e8ee190dfefad196b017f9bcb Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 23 May 2022 17:01:45 +0200 Subject: [PATCH 10/24] Fix test --- .../0_stateless/02311_create_table_with_unknown_format.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql index 5f43ecd1c65..d046ffebca2 100644 --- a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=S3('http://host:2020/test/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From 9518c41dda147023d37c47b220c2650999411bea Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 25 May 2022 09:01:12 +0000 Subject: [PATCH 11/24] Try to fix tests --- .../test_allowed_url_from_config/test.py | 24 +++++++++---------- ...02311_create_table_with_unknown_format.sql | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index 01a2a500ebf..da9d4404c82 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -33,7 +33,7 @@ def start_cluster(): def test_config_with_hosts(start_cluster): assert ( node1.query( - "CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)" + "CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', CSV)" ) == "" ) @@ -44,7 +44,7 @@ def test_config_with_hosts(start_cluster): == "" ) assert "not allowed" in node1.query_and_get_error( - "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)" + "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', CSV)" ) assert "not allowed" in node1.query_and_get_error( "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)" @@ -60,7 +60,7 @@ def test_config_with_only_primary_hosts(start_cluster): ) assert ( node2.query( - "CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)" + "CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', CSV)" ) == "" ) @@ -72,25 +72,25 @@ def test_config_with_only_primary_hosts(start_cluster): ) assert ( node2.query( - "CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)" + "CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', CSV)" ) == "" ) assert "not allowed" in node2.query_and_get_error( - "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node2.query_and_get_error( "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)" ) assert "not allowed" in node2.query_and_get_error( - "CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)" + "CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', CSV)" ) def test_config_with_only_regexp_hosts(start_cluster): assert ( node3.query( - "CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)" + "CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)" ) == "" ) @@ -104,7 +104,7 @@ def test_config_with_only_regexp_hosts(start_cluster): "CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node3.query_and_get_error( - "CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)" + "CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', CSV)" ) @@ -123,7 +123,7 @@ def test_config_without_allowed_hosts_section(start_cluster): ) assert ( node4.query( - "CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', CSV)" ) == "" ) @@ -135,7 +135,7 @@ def test_config_without_allowed_hosts_section(start_cluster): ) assert ( node4.query( - "CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)" + "CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', CSV)" ) == "" ) @@ -149,13 +149,13 @@ def test_config_without_allowed_hosts(start_cluster): "CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)" ) assert "not allowed" in node5.query_and_get_error( - "CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node5.query_and_get_error( "CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)" ) assert "not allowed" in node5.query_and_get_error( - "CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)" + "CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', CSV)" ) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql index d046ffebca2..54e388c3cf0 100644 --- a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, use-hdfs, no-backward-compatibility-check:22.5 create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From 3c1b6609ae76aaba93fbf526b3892cf060698048 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 21:23:35 +0000 Subject: [PATCH 12/24] Add comments and make tests more verbose --- src/Core/NamesAndTypes.h | 3 +- src/Functions/grouping.h | 12 +++ .../02293_grouping_function.reference | 83 ++++++++++++++++ .../0_stateless/02293_grouping_function.sql | 1 + ...02293_grouping_function_group_by.reference | 97 +++++++++++++++++++ .../02293_grouping_function_group_by.sql | 1 + 6 files changed, 196 insertions(+), 1 deletion(-) diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index c7a51f51816..b9c03aae0ca 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -105,9 +105,10 @@ public: /// Check that column contains in list bool contains(const String & name) const; - /// Try to get column by name, return empty optional if column not found + /// Try to get column by name, returns empty optional if column not found std::optional tryGetByName(const std::string & name) const; + /// Try to get column position by name, returns number of columns if column isn't found size_t getPosByName(const std::string & name) const noexcept; }; diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index 934be18345d..7c8970667da 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -93,6 +93,12 @@ public: return FunctionGroupingBase::executeImpl(arguments, input_rows_count, [this](UInt64 set_index, UInt64 arg_index) { + // For ROLLUP(a, b, c) there will be following grouping set indexes: + // | GROUPING SET | INDEX | + // | (a, b, c) | 0 | + // | (a, b) | 1 | + // | (a) | 2 | + // | () | 3 | return arg_index < aggregation_keys_number - set_index; } ); @@ -117,6 +123,12 @@ public: return FunctionGroupingBase::executeImpl(arguments, input_rows_count, [this](UInt64 set_index, UInt64 arg_index) { + // For CUBE(a, b) there will be following grouping set indexes: + // | GROUPING SET | INDEX | + // | (a, b) | 0 | + // | (a) | 1 | + // | (b) | 2 | + // | () | 3 | auto set_mask = (ONE << aggregation_keys_number) - 1 - set_index; return set_mask & (ONE << (aggregation_keys_number - arg_index - 1)); } diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index dbae7a11f2e..e71d6812ab5 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -1,3 +1,14 @@ +-- { echoOn } +SELECT + number, + grouping(number, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 1 0 1 0 2 @@ -10,6 +21,16 @@ 7 2 8 2 9 2 +SELECT + number, + grouping(number % 2, number) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 1 0 2 0 2 @@ -22,6 +43,16 @@ 7 1 8 1 9 1 +SELECT + number, + grouping(number, number % 2) = 1 AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 0 0 1 0 1 @@ -34,6 +65,15 @@ 7 0 8 0 9 0 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, grouping(number, number % 2) = 1; 0 0 0 @@ -46,6 +86,18 @@ 7 8 9 +SELECT + number, + count(), + grouping(number, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number, number % 2), + () + ) +ORDER BY (gr, number); 0 10 0 0 1 2 1 1 2 @@ -67,6 +119,17 @@ 7 1 3 8 1 3 9 1 3 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2) = 2 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; 0 1 2 @@ -77,8 +140,28 @@ 7 8 9 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2) = 1 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; 0 0 +SELECT + number, + GROUPING(number, number % 2) = 1 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2)) +ORDER BY number, gr; 0 0 0 1 0 1 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 4bbf620a619..169fc09c324 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -9,6 +9,7 @@ GROUP BY ) ORDER BY number, gr; -- { serverError BAD_ARGUMENTS } +-- { echoOn } SELECT number, grouping(number, number % 2) AS gr diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 021083db6eb..7f87aecd4bd 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -1,3 +1,12 @@ +-- { echoOn } +SELECT + number, + grouping(number, number % 2) = 3 +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; 0 1 1 1 2 1 @@ -8,6 +17,15 @@ 7 1 8 1 9 1 +SELECT + number, + grouping(number), + GROUPING(number % 2) +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; 0 1 1 1 1 1 2 1 1 @@ -18,6 +36,16 @@ 7 1 1 8 1 1 9 1 1 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH ROLLUP +ORDER BY + number, gr; 0 0 0 2 0 3 @@ -39,6 +67,14 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) +ORDER BY + number, gr; 0 0 0 2 0 3 @@ -60,6 +96,16 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH CUBE +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -83,6 +129,14 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -106,6 +160,15 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) + 3 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +HAVING grouping(number) != 0 +ORDER BY + number, gr; 0 5 0 6 1 5 @@ -126,6 +189,23 @@ 8 6 9 5 9 6 +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -151,6 +231,23 @@ 9 3 0 0 +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +ORDER BY + number, gr; 0 0 0 2 0 3 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index b30080b88af..9bf9d43478b 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -7,6 +7,7 @@ GROUP BY number % 2 ORDER BY number; -- { serverError BAD_ARGUMENTS } +-- { echoOn } SELECT number, grouping(number, number % 2) = 3 From 7cd7782e4f25b0a2aad9cfa69cd9ffcb4da1e6cc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 21:55:41 +0000 Subject: [PATCH 13/24] Process columns more efficiently in GROUPING() --- src/Functions/grouping.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index 7c8970667da..a49e946b2cb 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -43,18 +43,20 @@ public: template ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const { - auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto result = std::make_shared()->createColumn(); + auto result = ColumnUInt64::create(); + auto & result_data = result->getData(); + result_data.reserve(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { - UInt64 set_index = grouping_set_column->get64(i); + UInt64 set_index = grouping_set_column->getElement(i); UInt64 value = 0; for (auto index : arguments_indexes) value = (value << 1) + (checker(set_index, index) ? 1 : 0); - result->insert(Field(value)); + result_data.push_back(value); } return result; } From 16c6b6070344783c9fcbc935fc572da3207b07ec Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 23:22:29 +0000 Subject: [PATCH 14/24] Introduce AggregationKeysInfo --- src/Interpreters/ActionsVisitor.cpp | 26 ++++++++--------- src/Interpreters/ActionsVisitor.h | 37 ++++++++++++++++++++----- src/Interpreters/ExpressionAnalyzer.cpp | 12 ++------ src/Interpreters/ExpressionAnalyzer.h | 5 ++++ src/Storages/VirtualColumnUtils.cpp | 7 ++++- 5 files changed, 56 insertions(+), 31 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index d22989219a4..99d2217cba3 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -473,9 +473,7 @@ ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, - const NamesAndTypesList & aggregation_keys_, - const ColumnNumbersList & grouping_set_keys_, + std::reference_wrapper source_columns_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -483,22 +481,20 @@ ActionsMatcher::Data::Data( bool no_makeset_, bool only_consts_, bool create_source_for_in_, - GroupByKind group_by_kind_) + AggregationKeysInfo aggregation_keys_info_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) , source_columns(source_columns_) - , aggregation_keys(aggregation_keys_) - , grouping_set_keys(grouping_set_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) - , group_by_kind(group_by_kind_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) + , aggregation_keys_info(aggregation_keys_info_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) { } @@ -848,29 +844,31 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument"); if (arguments_size > 64) throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); + auto keys_info = data.aggregation_keys_info; + auto aggregation_keys_number = keys_info.aggregation_keys.size(); + ColumnNumbers arguments_indexes; - auto aggregation_keys_number = data.aggregation_keys.size(); for (auto const & arg : node.arguments->children) { - size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + size_t pos = keys_info.aggregation_keys.getPosByName(arg->getColumnName()); if (pos == aggregation_keys_number) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of GROUPING function {} is not a part of GROUP BY clause", arg->getColumnName()); arguments_indexes.push_back(pos); } - switch (data.group_by_kind) + switch (keys_info.group_by_kind) { case GroupByKind::GROUPING_SETS: { - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.grouping_set_keys)), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), keys_info.grouping_set_keys)), { "__grouping_set" }, column_name); break; } case GroupByKind::ROLLUP: - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name); break; case GroupByKind::CUBE: { - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name); break; } case GroupByKind::ORDINARY: @@ -880,7 +878,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } default: throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); + "Unexpected kind of GROUP BY clause for GROUPING function: {}", keys_info.group_by_kind); } return; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 5fd228ba836..5a74124192c 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -87,6 +87,33 @@ enum class GroupByKind GROUPING_SETS, }; +/* + * This class stores information about aggregation keys used in GROUP BY clause. + * It's used for providing information about aggregation to GROUPING function + * implementation. +*/ +struct AggregationKeysInfo +{ + AggregationKeysInfo( + std::reference_wrapper aggregation_keys_, + std::reference_wrapper grouping_set_keys_, + GroupByKind group_by_kind_) + : aggregation_keys(aggregation_keys_) + , grouping_set_keys(grouping_set_keys_) + , group_by_kind(group_by_kind_) + {} + + AggregationKeysInfo(const AggregationKeysInfo &) = default; + AggregationKeysInfo(AggregationKeysInfo &&) = default; + + // Names and types of all used keys + const NamesAndTypesList & aggregation_keys; + // Indexes of aggregation keys used in each grouping set (only for GROUP BY GROUPING SETS) + const ColumnNumbersList & grouping_set_keys; + + GroupByKind group_by_kind; +}; + /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. class ActionsMatcher { @@ -98,17 +125,15 @@ public: SizeLimits set_size_limit; size_t subquery_depth; const NamesAndTypesList & source_columns; - const NamesAndTypesList & aggregation_keys; - const ColumnNumbersList & grouping_set_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; bool no_makeset; bool only_consts; bool create_source_for_in; - GroupByKind group_by_kind; size_t visit_depth; ScopeStack actions_stack; + AggregationKeysInfo aggregation_keys_info; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -121,9 +146,7 @@ public: ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, - const NamesAndTypesList & aggregation_keys_, - const ColumnNumbersList & grouping_set_keys_, + std::reference_wrapper source_columns_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -131,7 +154,7 @@ public: bool no_makeset_, bool only_consts_, bool create_source_for_in_, - GroupByKind group_by_kind_); + AggregationKeysInfo aggregation_keys_info_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3931180d941..699f73abd67 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -603,8 +603,6 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -612,7 +610,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -626,8 +624,6 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -635,7 +631,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -650,8 +646,6 @@ void ExpressionAnalyzer::getRootActionsForHaving( settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -659,7 +653,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ecac8cba771..971fe753978 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -205,6 +205,11 @@ protected: NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); + + AggregationKeysInfo getAggregationKeysInfo() const noexcept + { + return { aggregation_keys, aggregation_keys_indexes_list, group_by_kind }; + } }; class SelectQueryExpressionAnalyzer; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index dd6c30e3c79..40cf650f690 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -156,8 +156,13 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block auto actions = std::make_shared(block.getColumnsWithTypeAndName()); PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; + const NamesAndTypesList source_columns; + const NamesAndTypesList aggregation_keys; + const ColumnNumbersList grouping_set_keys; + ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, GroupByKind::NONE); + context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, + { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); From abc90fad8dba3521a29d044cadad599d9336dcec Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 27 May 2022 12:42:51 +0000 Subject: [PATCH 15/24] fix WITH FILL with negative itervals --- src/Processors/Transforms/FillingTransform.cpp | 4 ++-- tests/queries/0_stateless/02112_with_fill_interval.reference | 3 +++ tests/queries/0_stateless/02112_with_fill_interval.sql | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 9e5d57a2b43..a41b5660e0d 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -90,9 +90,9 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & if (which.isDate() || which.isDate32()) { Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); - if (avg_seconds < 86400) + if (std::abs(avg_seconds) < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds)); } if (which.isDate()) diff --git a/tests/queries/0_stateless/02112_with_fill_interval.reference b/tests/queries/0_stateless/02112_with_fill_interval.reference index fc6f9378bfa..4bb99803eb1 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.reference +++ b/tests/queries/0_stateless/02112_with_fill_interval.reference @@ -107,3 +107,6 @@ 2020-05-01 2 0 2020-05-01 3 0 2020-05-01 4 0 +1970-01-04 +1970-01-03 +1970-01-02 diff --git a/tests/queries/0_stateless/02112_with_fill_interval.sql b/tests/queries/0_stateless/02112_with_fill_interval.sql index f26ec7da8c9..d2416f9a84b 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.sql +++ b/tests/queries/0_stateless/02112_with_fill_interval.sql @@ -79,3 +79,6 @@ d WITH FILL id WITH FILL FROM 1 TO 5; DROP TABLE with_fill_date; + +SELECT d FROM (SELECT toDate(1) AS d) +ORDER BY d DESC WITH FILL FROM toDate(3) TO toDate(0) STEP INTERVAL -1 DAY; From d2cdbf3956a9ff1f62a98a8c0b7c3cf1c7c29b66 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 29 May 2022 14:09:49 +0200 Subject: [PATCH 16/24] Fix refactoring issue --- src/Storages/MergeTree/MergeTreeData.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 16699260ea0..7726a752cbe 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3079,7 +3079,9 @@ void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. - const DataPartPtr & part = *it_part; + /// Important to own part pointer here (not const reference), because it will be removed from data_parts_indexes + /// few lines below. + DataPartPtr part = *it_part; // NOLINT if (part->getState() == DataPartState::Active) { From c14b547f00a62bd050f2675a3a20df66cc9ba20a Mon Sep 17 00:00:00 2001 From: qinghuan wang <1095193290@qq.com> Date: Sun, 29 May 2022 20:19:23 +0800 Subject: [PATCH 17/24] Update gui.md --- docs/en/interfaces/third-party/gui.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 92d00f2812c..65396d28b40 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -147,6 +147,16 @@ Features: [Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries. +### ClickCat {#clickcat} + +[ClickCat](http://8.135.49.240:8080/dashboard) is a firendly user interface that lets you search,explore and visualize your ClickHouse Data. + +Features: + +- An online SQL editor which can run your SQL code without any installing. +- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. +- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. + ## Commercial {#commercial} ### DataGrip {#datagrip} From 746ff42239bc497cab25dece8d775f15cfb14403 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 30 May 2022 01:13:03 +0300 Subject: [PATCH 18/24] Update gui.md --- docs/en/interfaces/third-party/gui.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 65396d28b40..27e18a2d858 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -149,7 +149,7 @@ Features: ### ClickCat {#clickcat} -[ClickCat](http://8.135.49.240:8080/dashboard) is a firendly user interface that lets you search,explore and visualize your ClickHouse Data. +[ClickCat](http://8.135.49.240:8080/dashboard) is a firendly user interface that lets you search, explore and visualize your ClickHouse Data. Features: From ded9f5675ded1f4bf0a7bda75086af5353fd37bd Mon Sep 17 00:00:00 2001 From: qinghuan wang <1095193290@qq.com> Date: Mon, 30 May 2022 09:22:06 +0800 Subject: [PATCH 19/24] Update gui.md --- docs/en/interfaces/third-party/gui.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 27e18a2d858..210c60cc71f 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -149,7 +149,7 @@ Features: ### ClickCat {#clickcat} -[ClickCat](http://8.135.49.240:8080/dashboard) is a firendly user interface that lets you search, explore and visualize your ClickHouse Data. +[ClickCat](https://github.com/open-botech/ClickCat) is a firendly user interface that lets you search, explore and visualize your ClickHouse Data. Features: From 9ffa5f5e0d39ab7df981cf0fcc2952fedf53f60f Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 30 May 2022 08:10:16 +0000 Subject: [PATCH 20/24] fix typo --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 75a30a2d792..b79dba69e9c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -567,7 +567,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ - M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ + M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended", 0) \ M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \ M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \ From 78bd47d8df8fcc2344a01c4cf1d59beef13ab2d3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 30 May 2022 11:32:14 +0300 Subject: [PATCH 21/24] Fix excessive LIST requests to coordinator for transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In [1] there was only few transactions, but lots of List for /test/clickhouse/txn/log: $ clickhouse-local --format TSVWithNamesAndTypes --file zookeeper_log.tsv.gz -q "select * except('path|session_id|event_time|thread_id|event_date|xid') apply(x->groupUniqArray(x)), path, session_id, min(event_time), max(event_time), count() from table where has_watch and type = 'Request' group by path, session_id order by count() desc limit 1 format Vertical" Row 1: ────── groupUniqArray(type): ['Request'] groupUniqArray(query_id): ['','62d75128-9031-48a5-87ba-aec3f0b591c6'] groupUniqArray(address): ['::1'] groupUniqArray(port): [9181] groupUniqArray(has_watch): [1] groupUniqArray(op_num): ['List'] groupUniqArray(data): [''] groupUniqArray(is_ephemeral): [0] groupUniqArray(is_sequential): [0] groupUniqArray(version): [] groupUniqArray(requests_size): [0] groupUniqArray(request_idx): [0] groupUniqArray(error): [] groupUniqArray(watch_type): [] groupUniqArray(watch_state): [] groupUniqArray(stat_version): [0] groupUniqArray(stat_cversion): [0] groupUniqArray(stat_dataLength): [0] groupUniqArray(stat_numChildren): [0] groupUniqArray(children): [[]] path: /test/clickhouse/txn/log session_id: 1 min(event_time): 2022-05-27 12:54:09.025897 max(event_time): 2022-05-27 13:37:12.846314