Support ordinary GROUP BY

This commit is contained in:
Dmitry Novik 2022-05-13 23:04:12 +00:00
parent efb30bdf64
commit 6fc7dfea80
11 changed files with 123 additions and 49 deletions

View File

@ -45,8 +45,31 @@ public:
return std::make_shared<DataTypeUInt64>();
}
ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
auto grouping_set_map_column = checkAndGetColumnConst<ColumnUInt64>(arguments[0].column.get());
auto argument_keys_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
auto aggregation_keys_number = (*grouping_set_map_column)[0].get<UInt64>();
auto result = std::make_shared<DataTypeUInt64>()->createColumn();
for (size_t i = 0; i < input_rows_count; ++i)
{
auto indexes = (*argument_keys_column)[i].get<Array>();
UInt64 value = 0;
for (auto index : indexes)
value = (value << 1) + (index.get<UInt64>() < aggregation_keys_number ? 1 : 0);
result->insert(Field(value));
}
return result;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (arguments.size() == 2)
return executeSingleGroupingSet(arguments, input_rows_count);
auto grouping_set_column = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
auto grouping_set_map_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
auto argument_keys_column = checkAndGetColumnConst<ColumnArray>(arguments[2].column.get());

View File

@ -477,7 +477,8 @@ ActionsMatcher::Data::Data(
bool no_subqueries_,
bool no_makeset_,
bool only_consts_,
bool create_source_for_in_)
bool create_source_for_in_,
bool has_grouping_set_column_)
: WithContext(context_)
, set_size_limit(set_size_limit_)
, subquery_depth(subquery_depth_)
@ -490,6 +491,7 @@ ActionsMatcher::Data::Data(
, no_makeset(no_makeset_)
, only_consts(only_consts_)
, create_source_for_in(create_source_for_in_)
, has_grouping_set_column(has_grouping_set_column_)
, visit_depth(0)
, actions_stack(std::move(actions_dag), context_)
, next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1)
@ -842,19 +844,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
{
ColumnWithTypeAndName column;
column.name = "__grouping_set_map";
size_t map_size = data.aggregation_keys.size() + 1;
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeFixedString>(map_size));
Array maps_per_set;
for (auto & grouping_set : data.grouping_set_keys)
if (data.has_grouping_set_column)
{
std::string key_map(map_size, '0');
for (auto index : grouping_set)
key_map[index] = '1';
maps_per_set.push_back(key_map);
size_t map_size = data.aggregation_keys.size() + 1;
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeFixedString>(map_size));
Array maps_per_set;
for (auto & grouping_set : data.grouping_set_keys)
{
std::string key_map(map_size, '0');
for (auto index : grouping_set)
key_map[index] = '1';
maps_per_set.push_back(key_map);
}
auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size));
grouping_set_map_column->insert(maps_per_set);
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
}
else
{
column.type = std::make_shared<DataTypeUInt64>();
auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size());
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
}
auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size));
grouping_set_map_column->insert(maps_per_set);
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
data.addColumn(column);
}
@ -875,11 +886,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
data.addColumn(column);
}
data.addFunction(
FunctionFactory::instance().get("grouping", data.getContext()),
{ "__grouping_set", "__grouping_set_map", arguments_column_name },
column_name
);
if (data.has_grouping_set_column)
{
data.addFunction(
FunctionFactory::instance().get("grouping", data.getContext()),
{ "__grouping_set", "__grouping_set_map", arguments_column_name },
column_name
);
}
else
{
data.addFunction(
FunctionFactory::instance().get("grouping", data.getContext()),
{ "__grouping_set_map", arguments_column_name },
column_name
);
}
return;
}

View File

@ -97,6 +97,7 @@ public:
bool no_makeset;
bool only_consts;
bool create_source_for_in;
bool has_grouping_set_column;
size_t visit_depth;
ScopeStack actions_stack;
@ -120,7 +121,8 @@ public:
bool no_subqueries_,
bool no_makeset_,
bool only_consts_,
bool create_source_for_in_);
bool create_source_for_in_,
bool has_grouping_set_column_);
/// Does result of the calculation already exists in the block.
bool hasColumn(const String & column_name) const;

View File

@ -333,8 +333,10 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
/// For GROUPING SETS with multiple groups we always add virtual __grouping_set column
/// With set number, which is used as an additional key at the stage of merging aggregating data.
if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1;
if (process_grouping_sets)
aggregated_columns.emplace_back("__grouping_set", std::make_shared<DataTypeUInt64>());
need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets;
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
{
@ -452,8 +454,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
}
}
// if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
// aggregated_columns.emplace_back("__grouping_set_map", std::make_shared<DataTypeFixedString>(aggregation_keys.size() + 1));
if (!select_query->group_by_with_grouping_sets)
{
auto & list = aggregation_keys_indexes_list.emplace_back();
for (size_t i = 0; i < aggregation_keys.size(); ++i)
list.push_back(i);
}
if (group_asts.empty())
{
@ -598,7 +604,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
!isRemoteStorage() /* create_source_for_in */,
need_grouping_set_column);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
@ -620,7 +627,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
true /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
!isRemoteStorage() /* create_source_for_in */,
need_grouping_set_column);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
@ -643,7 +651,8 @@ void ExpressionAnalyzer::getRootActionsForHaving(
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
true /* create_source_for_in */);
true /* create_source_for_in */,
need_grouping_set_column);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}

View File

@ -77,6 +77,8 @@ struct ExpressionAnalyzerData
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
TemporaryTablesMapping external_tables;
bool need_grouping_set_column = false;
};

View File

@ -1,6 +1,5 @@
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeInterval.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
@ -757,9 +756,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
res.insert({nullptr, type, aggregate.column_name});
}
// if (analysis_result.use_grouping_set_key)
// res.insert({ nullptr, std::make_shared<DataTypeFixedString>(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" });
return res;
}

View File

@ -89,6 +89,8 @@ public:
bool group_by_with_grouping_sets = false;
bool limit_with_ties = false;
bool needGroupingSetColumn() const noexcept { return group_by_with_cube || group_by_with_rollup || group_by_with_grouping_sets; }
ASTPtr & refSelect() { return getExpression(Expression::SELECT); }
ASTPtr & refTables() { return getExpression(Expression::TABLES); }
ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); }

View File

@ -50,10 +50,6 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa
for (auto & col : block)
res.insert(std::move(col));
// auto map_column = ColumnFixedString::create(keys_size + 1);
// map_column->resize(rows);
// res.insert({ColumnPtr(std::move(map_column)), std::make_shared<DataTypeFixedString>(keys_size + 1), "__grouping_set_map"});
return res;
}
@ -243,22 +239,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
index.push_back(dag->getIndex()[header.getPositionByName(col.name)]);
}
// {
// std::string grouping_map;
// grouping_map.reserve(params.keys_size + 1);
// std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end());
// for (auto key : params.keys)
// grouping_map += key_set.contains(key) ? '1' : '0';
// grouping_map += '0';
// auto nested_column = ColumnFixedString::create(params.keys_size + 1);
// nested_column->insertString(grouping_map);
// auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0);
// const auto * grouping_map_node = &dag->addColumn(
// {ColumnPtr(std::move(grouping_map_col)), std::make_shared<DataTypeFixedString>(grouping_map.length()), "__grouping_set_map"});
// grouping_map_node = &dag->materializeNode(*grouping_map_node);
// index.push_back(grouping_map_node);
// }
dag->getIndex().swap(index);
auto expression = std::make_shared<ExpressionActions>(dag, settings.getActionsSettings());
auto transform = std::make_shared<ExpressionTransform>(header, expression);

View File

@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
PreparedSets prepared_sets;
SubqueriesForSets subqueries_for_sets;
ActionsVisitor::Data visitor_data(
context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false);
context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false);
ActionsVisitor(visitor_data).visit(node);
actions = visitor_data.getActions();
auto expression_actions = std::make_shared<ExpressionActions>(actions);

View File

@ -0,0 +1,20 @@
0 1
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
0 1 1
1 1 1
2 1 1
3 1 1
4 1 1
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1

View File

@ -0,0 +1,18 @@
SELECT
number,
grouping(number, number % 2, number % 3) = 6
FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;
SELECT
number,
grouping(number),
GROUPING(number % 2)
FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;