mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Support ordinary GROUP BY
This commit is contained in:
parent
efb30bdf64
commit
6fc7dfea80
@ -45,8 +45,31 @@ public:
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
|
||||
{
|
||||
auto grouping_set_map_column = checkAndGetColumnConst<ColumnUInt64>(arguments[0].column.get());
|
||||
auto argument_keys_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
|
||||
|
||||
auto aggregation_keys_number = (*grouping_set_map_column)[0].get<UInt64>();
|
||||
|
||||
auto result = std::make_shared<DataTypeUInt64>()->createColumn();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
auto indexes = (*argument_keys_column)[i].get<Array>();
|
||||
UInt64 value = 0;
|
||||
for (auto index : indexes)
|
||||
value = (value << 1) + (index.get<UInt64>() < aggregation_keys_number ? 1 : 0);
|
||||
|
||||
result->insert(Field(value));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
if (arguments.size() == 2)
|
||||
return executeSingleGroupingSet(arguments, input_rows_count);
|
||||
|
||||
auto grouping_set_column = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
|
||||
auto grouping_set_map_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
|
||||
auto argument_keys_column = checkAndGetColumnConst<ColumnArray>(arguments[2].column.get());
|
||||
|
@ -477,7 +477,8 @@ ActionsMatcher::Data::Data(
|
||||
bool no_subqueries_,
|
||||
bool no_makeset_,
|
||||
bool only_consts_,
|
||||
bool create_source_for_in_)
|
||||
bool create_source_for_in_,
|
||||
bool has_grouping_set_column_)
|
||||
: WithContext(context_)
|
||||
, set_size_limit(set_size_limit_)
|
||||
, subquery_depth(subquery_depth_)
|
||||
@ -490,6 +491,7 @@ ActionsMatcher::Data::Data(
|
||||
, no_makeset(no_makeset_)
|
||||
, only_consts(only_consts_)
|
||||
, create_source_for_in(create_source_for_in_)
|
||||
, has_grouping_set_column(has_grouping_set_column_)
|
||||
, visit_depth(0)
|
||||
, actions_stack(std::move(actions_dag), context_)
|
||||
, next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1)
|
||||
@ -842,19 +844,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = "__grouping_set_map";
|
||||
size_t map_size = data.aggregation_keys.size() + 1;
|
||||
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeFixedString>(map_size));
|
||||
Array maps_per_set;
|
||||
for (auto & grouping_set : data.grouping_set_keys)
|
||||
if (data.has_grouping_set_column)
|
||||
{
|
||||
std::string key_map(map_size, '0');
|
||||
for (auto index : grouping_set)
|
||||
key_map[index] = '1';
|
||||
maps_per_set.push_back(key_map);
|
||||
size_t map_size = data.aggregation_keys.size() + 1;
|
||||
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeFixedString>(map_size));
|
||||
Array maps_per_set;
|
||||
for (auto & grouping_set : data.grouping_set_keys)
|
||||
{
|
||||
std::string key_map(map_size, '0');
|
||||
for (auto index : grouping_set)
|
||||
key_map[index] = '1';
|
||||
maps_per_set.push_back(key_map);
|
||||
}
|
||||
auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size));
|
||||
grouping_set_map_column->insert(maps_per_set);
|
||||
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
column.type = std::make_shared<DataTypeUInt64>();
|
||||
auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size());
|
||||
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
|
||||
}
|
||||
auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size));
|
||||
grouping_set_map_column->insert(maps_per_set);
|
||||
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
|
||||
|
||||
data.addColumn(column);
|
||||
}
|
||||
@ -875,11 +886,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
data.addColumn(column);
|
||||
}
|
||||
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get("grouping", data.getContext()),
|
||||
{ "__grouping_set", "__grouping_set_map", arguments_column_name },
|
||||
column_name
|
||||
);
|
||||
if (data.has_grouping_set_column)
|
||||
{
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get("grouping", data.getContext()),
|
||||
{ "__grouping_set", "__grouping_set_map", arguments_column_name },
|
||||
column_name
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get("grouping", data.getContext()),
|
||||
{ "__grouping_set_map", arguments_column_name },
|
||||
column_name
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -97,6 +97,7 @@ public:
|
||||
bool no_makeset;
|
||||
bool only_consts;
|
||||
bool create_source_for_in;
|
||||
bool has_grouping_set_column;
|
||||
size_t visit_depth;
|
||||
ScopeStack actions_stack;
|
||||
|
||||
@ -120,7 +121,8 @@ public:
|
||||
bool no_subqueries_,
|
||||
bool no_makeset_,
|
||||
bool only_consts_,
|
||||
bool create_source_for_in_);
|
||||
bool create_source_for_in_,
|
||||
bool has_grouping_set_column_);
|
||||
|
||||
/// Does result of the calculation already exists in the block.
|
||||
bool hasColumn(const String & column_name) const;
|
||||
|
@ -333,8 +333,10 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
|
||||
/// For GROUPING SETS with multiple groups we always add virtual __grouping_set column
|
||||
/// With set number, which is used as an additional key at the stage of merging aggregating data.
|
||||
if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
|
||||
bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1;
|
||||
if (process_grouping_sets)
|
||||
aggregated_columns.emplace_back("__grouping_set", std::make_shared<DataTypeUInt64>());
|
||||
need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets;
|
||||
|
||||
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
|
||||
{
|
||||
@ -452,8 +454,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
}
|
||||
}
|
||||
|
||||
// if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
|
||||
// aggregated_columns.emplace_back("__grouping_set_map", std::make_shared<DataTypeFixedString>(aggregation_keys.size() + 1));
|
||||
if (!select_query->group_by_with_grouping_sets)
|
||||
{
|
||||
auto & list = aggregation_keys_indexes_list.emplace_back();
|
||||
for (size_t i = 0; i < aggregation_keys.size(); ++i)
|
||||
list.push_back(i);
|
||||
}
|
||||
|
||||
if (group_asts.empty())
|
||||
{
|
||||
@ -598,7 +604,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
!isRemoteStorage() /* create_source_for_in */,
|
||||
need_grouping_set_column);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
@ -620,7 +627,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP
|
||||
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
|
||||
true /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
!isRemoteStorage() /* create_source_for_in */,
|
||||
need_grouping_set_column);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
@ -643,7 +651,8 @@ void ExpressionAnalyzer::getRootActionsForHaving(
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
true /* create_source_for_in */);
|
||||
true /* create_source_for_in */,
|
||||
need_grouping_set_column);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
|
@ -77,6 +77,8 @@ struct ExpressionAnalyzerData
|
||||
|
||||
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
|
||||
TemporaryTablesMapping external_tables;
|
||||
|
||||
bool need_grouping_set_column = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -757,9 +756,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
|
||||
res.insert({nullptr, type, aggregate.column_name});
|
||||
}
|
||||
|
||||
// if (analysis_result.use_grouping_set_key)
|
||||
// res.insert({ nullptr, std::make_shared<DataTypeFixedString>(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" });
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -89,6 +89,8 @@ public:
|
||||
bool group_by_with_grouping_sets = false;
|
||||
bool limit_with_ties = false;
|
||||
|
||||
bool needGroupingSetColumn() const noexcept { return group_by_with_cube || group_by_with_rollup || group_by_with_grouping_sets; }
|
||||
|
||||
ASTPtr & refSelect() { return getExpression(Expression::SELECT); }
|
||||
ASTPtr & refTables() { return getExpression(Expression::TABLES); }
|
||||
ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); }
|
||||
|
@ -50,10 +50,6 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa
|
||||
for (auto & col : block)
|
||||
res.insert(std::move(col));
|
||||
|
||||
// auto map_column = ColumnFixedString::create(keys_size + 1);
|
||||
// map_column->resize(rows);
|
||||
// res.insert({ColumnPtr(std::move(map_column)), std::make_shared<DataTypeFixedString>(keys_size + 1), "__grouping_set_map"});
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -243,22 +239,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
|
||||
index.push_back(dag->getIndex()[header.getPositionByName(col.name)]);
|
||||
}
|
||||
|
||||
// {
|
||||
// std::string grouping_map;
|
||||
// grouping_map.reserve(params.keys_size + 1);
|
||||
// std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end());
|
||||
// for (auto key : params.keys)
|
||||
// grouping_map += key_set.contains(key) ? '1' : '0';
|
||||
// grouping_map += '0';
|
||||
// auto nested_column = ColumnFixedString::create(params.keys_size + 1);
|
||||
// nested_column->insertString(grouping_map);
|
||||
// auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0);
|
||||
// const auto * grouping_map_node = &dag->addColumn(
|
||||
// {ColumnPtr(std::move(grouping_map_col)), std::make_shared<DataTypeFixedString>(grouping_map.length()), "__grouping_set_map"});
|
||||
// grouping_map_node = &dag->materializeNode(*grouping_map_node);
|
||||
// index.push_back(grouping_map_node);
|
||||
// }
|
||||
|
||||
dag->getIndex().swap(index);
|
||||
auto expression = std::make_shared<ExpressionActions>(dag, settings.getActionsSettings());
|
||||
auto transform = std::make_shared<ExpressionTransform>(header, expression);
|
||||
|
@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
|
||||
PreparedSets prepared_sets;
|
||||
SubqueriesForSets subqueries_for_sets;
|
||||
ActionsVisitor::Data visitor_data(
|
||||
context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false);
|
||||
context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false);
|
||||
ActionsVisitor(visitor_data).visit(node);
|
||||
actions = visitor_data.getActions();
|
||||
auto expression_actions = std::make_shared<ExpressionActions>(actions);
|
||||
|
@ -0,0 +1,20 @@
|
||||
0 1
|
||||
1 1
|
||||
2 1
|
||||
3 1
|
||||
4 1
|
||||
5 1
|
||||
6 1
|
||||
7 1
|
||||
8 1
|
||||
9 1
|
||||
0 1 1
|
||||
1 1 1
|
||||
2 1 1
|
||||
3 1 1
|
||||
4 1 1
|
||||
5 1 1
|
||||
6 1 1
|
||||
7 1 1
|
||||
8 1 1
|
||||
9 1 1
|
@ -0,0 +1,18 @@
|
||||
SELECT
|
||||
number,
|
||||
grouping(number, number % 2, number % 3) = 6
|
||||
FROM remote('127.0.0.{2,3}', numbers(10))
|
||||
GROUP BY
|
||||
number,
|
||||
number % 2
|
||||
ORDER BY number;
|
||||
|
||||
SELECT
|
||||
number,
|
||||
grouping(number),
|
||||
GROUPING(number % 2)
|
||||
FROM remote('127.0.0.{2,3}', numbers(10))
|
||||
GROUP BY
|
||||
number,
|
||||
number % 2
|
||||
ORDER BY number;
|
Loading…
Reference in New Issue
Block a user