Fix distributed GROUPING SETS and GROUPING function

This commit is contained in:
Dmitry Novik 2023-03-09 18:00:23 +00:00
parent e2bb5f12a1
commit a305c6e7ab
10 changed files with 88 additions and 2 deletions

View File

@ -65,6 +65,11 @@ void FunctionNode::resolveAsFunction(FunctionBasePtr function_value)
kind = FunctionKind::ORDINARY;
}
void FunctionNode::resolveAsFunctionWithName(String name)
{
function_name = std::move(name);
}
void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value)
{
function_name = aggregate_function_value->getName();

View File

@ -163,6 +163,8 @@ public:
*/
void resolveAsFunction(FunctionBasePtr function_value);
void resolveAsFunctionWithName(String name);
void resolveAsFunction(const FunctionOverloadResolverPtr & resolver)
{
resolveAsFunction(resolver->build(getArgumentColumns()));

View File

@ -14,6 +14,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_AN_AGGREGATE;
extern const int NOT_IMPLEMENTED;
}
@ -55,7 +56,7 @@ public:
}
if (!found_argument_in_group_by_keys)
throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"GROUPING function argument {} is not in GROUP BY keys. In query {}",
grouping_function_arguments_node->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());

View File

@ -0,0 +1,57 @@
#include <Analyzer/Visitors/GeneralizeGroupingFunctionForDistributedVisitor.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class GeneralizeGroupingFunctionForDistributedVisitor : public InDepthQueryTreeVisitor<GeneralizeGroupingFunctionForDistributedVisitor>
{
public:
static void visitImpl(QueryTreeNodePtr & node)
{
auto * function = node->as<FunctionNode>();
if (!function)
return;
const auto & function_name = function->getFunctionName();
bool ordinary_grouping = function_name == "groupingOrdinary";
if (!ordinary_grouping
&& function_name != "groupingForRollup"
&& function_name != "groupingForCube"
&& function_name != "groupingForGroupingSets")
return;
function->resolveAsFunctionWithName("GROUPING");
if (ordinary_grouping)
return;
auto & arguments = function->getArguments().getNodes();
if (arguments.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Grouping function specialization must have arguments");
auto * grouping_set_arg = arguments[0]->as<ColumnNode>();
if (!grouping_set_arg || grouping_set_arg->getColumnName() != "__grouping_set")
throw Exception(ErrorCodes::LOGICAL_ERROR,
"The first argument of Grouping function specialization must be '__grouping_set' column but {} found",
arguments[0]->dumpTree());
arguments.erase(arguments.begin());
}
};
void removeGroupingFunctionSpecializations(QueryTreeNodePtr & node)
{
GeneralizeGroupingFunctionForDistributedVisitor visitor;
visitor.visit(node);
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <Analyzer/IQueryTreeNode.h>
namespace DB
{
void removeGroupingFunctionSpecializations(QueryTreeNodePtr & node);
}

View File

@ -235,6 +235,7 @@ add_object_library(clickhouse_databases_mysql Databases/MySQL)
add_object_library(clickhouse_disks Disks)
add_object_library(clickhouse_analyzer Analyzer)
add_object_library(clickhouse_analyzer_passes Analyzer/Passes)
add_object_library(clickhouse_analyzer_visitors Analyzer/Visitors)
add_object_library(clickhouse_planner Planner)
add_object_library(clickhouse_interpreters Interpreters)
add_object_library(clickhouse_interpreters_cache Interpreters/Cache)

View File

@ -390,7 +390,11 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
* but it can work more slowly.
*/
Aggregator::Params params(aggregation_analysis_result.aggregation_keys,
auto keys = aggregation_analysis_result.aggregation_keys;
if (!aggregation_analysis_result.grouping_sets_parameters_list.empty())
keys.insert(keys.begin(), "__grouping_set");
Aggregator::Params params(keys,
aggregation_analysis_result.aggregate_descriptions,
query_analysis_result.aggregate_overflow_row,
settings.max_threads,

View File

@ -44,6 +44,7 @@
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Visitors/GeneralizeGroupingFunctionForDistributedVisitor.h>
#include <Planner/Planner.h>
#include <Planner/Utils.h>
@ -686,6 +687,7 @@ void StorageDistributed::read(
storage_snapshot,
remote_storage_id,
remote_table_function_ptr);
removeGroupingFunctionSpecializations(query_tree_with_replaced_distributed_table);
query_ast = queryNodeToSelectQuery(query_tree_with_replaced_distributed_table);

View File

@ -1,3 +1,5 @@
set optimize_group_by_function_keys=0;
SELECT
number,
grouping(number, number % 2, number % 3) AS gr

View File

@ -1,3 +1,5 @@
set optimize_group_by_function_keys=0;
SELECT
number,
grouping(number, number % 2, number % 3) = 6