avoid extracting the deepest columns

This commit is contained in:
taofengliu 2022-10-20 18:03:00 +08:00
parent 05def101db
commit 4d5bf0e3b2
5 changed files with 118 additions and 46 deletions

View File

@ -245,13 +245,13 @@ You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../.
## GROUP BY ALL
`GROUP BY ALL` is equivalent to listing all the SELECT-ed columns that are not expressions of the aggregate functions.
`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are aggregate functions.
For example:
``` sql
SELECT
a,
a * 2,
b,
count(c),
FROM t
@ -266,7 +266,29 @@ SELECT
b,
count(c),
FROM t
GROUP BY a, b
GROUP BY a * 2, b
```
For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` key will contain the max non-aggregate fields we can extract from it.
For example:
``` sql
SELECT
substring(a, 4, 2)
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY ALL
```
is the same as
``` sql
SELECT
substring(a, 4, 2)
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
```
## Examples

View File

@ -79,13 +79,13 @@ sidebar_label: GROUP BY
## GROUP BY ALL {#group-by-all}
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的进行 `GROUP BY`
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行 `GROUP BY`
例如
``` sql
SELECT
a,
a * 2,
b,
count(c),
FROM t
@ -96,11 +96,33 @@ GROUP BY ALL
``` sql
SELECT
a,
a * 2,
b,
count(c),
FROM t
GROUP BY a, b
GROUP BY a * 2, b
```
对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`
例如:
``` sql
SELECT
substring(a, 4, 2)
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY ALL
```
效果等同于
``` sql
SELECT
substring(a, 4, 2)
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
```
## 例子 {#examples}

View File

@ -786,27 +786,52 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
}
void recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(const IAST & from, ASTExpressionList & into)
std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(const ASTFunction & function, ASTExpressionList & into)
{
checkStackSize();
const auto * function = from.as<ASTFunction>();
if (!function)
return;
if (AggregateUtils::isAggregateFunction(function))
return {true, 0};
if (AggregateUtils::isAggregateFunction(*function))
return;
UInt64 pushed_children = 0;
bool has_aggregate = false;
for (const auto & child : function->arguments->children)
for (const auto & child : function.arguments->children)
{
if (child->as<ASTIdentifier>())
{
into.children.push_back(child);
else
recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(*child, into);
pushed_children++;
}
else if (child->as<ASTFunction>())
{
const auto * child_func = child->as<ASTFunction>();
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*child_func, into);
/// The current function is not aggregate function and there is no aggregate function in its arguments,
/// so use the current function to replace its children
if (!child_has_aggregate)
{
for (UInt64 i = 0; i < child_pushed_children; i++)
{
into.children.pop_back();
}
into.children.push_back(child);
pushed_children = pushed_children - child_pushed_children + 1;
}
else
{
has_aggregate = true;
pushed_children += child_pushed_children;
}
}
}
return {has_aggregate, pushed_children};
}
/// Expand GROUP BY ALL by listing all the SELECT-ed columns that are not expressions of the aggregate functions
/// Expand GROUP BY ALL
void expandGroupByAll(ASTSelectQuery * select_query)
{
auto group_expression_list = std::make_shared<ASTExpressionList>();
@ -815,14 +840,26 @@ void expandGroupByAll(ASTSelectQuery * select_query)
{
if (expr->as<ASTIdentifier>())
group_expression_list->children.push_back(expr);
else
recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(*expr, *group_expression_list);
else if (expr->as<ASTFunction>())
{
auto [has_aggregate, pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*expr->as<ASTFunction>(), *group_expression_list);
/// The current function is not aggregate function and there is no aggregate function in its arguments,
/// so use the current function to replace its children
if (!has_aggregate)
{
for (UInt64 i = 0; i < pushed_children; i++)
{
group_expression_list->children.pop_back();
}
group_expression_list->children.push_back(expr);
}
}
}
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.

View File

@ -1,17 +1,10 @@
3 2
2 2
1 2
1 3 2
1 2 2
1 1 2
7 2
5 2
3 2
4 2
3 2
5 1
6 1
3 5 1
1 3 2
3 6 1
2 4 2
abc4 1
abc1 1
abc3 1
abc2 1
abc 4
abc ab
abc a
abc a
abc a
abc a

View File

@ -2,16 +2,14 @@ DROP TABLE IF EXISTS group_by_all;
CREATE TABLE group_by_all
(
a int,
b int,
ALL int
a String,
b int
)
engine = Memory;
INSERT INTO group_by_all VALUES (1, 2, 3), (1, 2, 3), (2, 4, 4), (2, 5, 4), (3, 5, 5), (3, 5, 6);
insert into group_by_all values ('abc1', 0), ('abc2', 0), ('abc3', 0), ('abc4', 0);
SELECT a, COUNT(b) FROM group_by_all GROUP BY ALL;
SELECT 1, a, COUNT(b) FROM group_by_all GROUP BY ALL;
SELECT (a * 2) + 1, COUNT(b) FROM group_by_all GROUP BY ALL;
SELECT `ALL`, COUNT(b) FROM group_by_all GROUP BY `ALL`;
SELECT a, `ALL`, COUNT(b) FROM group_by_all GROUP BY ALL;
select a, count(b) from group_by_all group by all;
select substring(a, 1, 3), count(b) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;