mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
avoid extracting the deepest columns
This commit is contained in:
parent
05def101db
commit
4d5bf0e3b2
@ -245,13 +245,13 @@ You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../.
|
||||
|
||||
## GROUP BY ALL
|
||||
|
||||
`GROUP BY ALL` is equivalent to listing all the SELECT-ed columns that are not expressions of the aggregate functions.
|
||||
`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are aggregate functions.
|
||||
|
||||
For example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a,
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
@ -266,7 +266,29 @@ SELECT
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY a, b
|
||||
GROUP BY a * 2, b
|
||||
```
|
||||
|
||||
For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` key will contain the max non-aggregate fields we can extract from it.
|
||||
|
||||
For example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2)
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
is the same as
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2)
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
@ -79,13 +79,13 @@ sidebar_label: GROUP BY
|
||||
|
||||
## GROUP BY ALL {#group-by-all}
|
||||
|
||||
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的列进行 `GROUP BY`
|
||||
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行 `GROUP BY`
|
||||
|
||||
例如
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a,
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
@ -96,11 +96,33 @@ GROUP BY ALL
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a,
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY a, b
|
||||
GROUP BY a * 2, b
|
||||
```
|
||||
|
||||
对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`
|
||||
|
||||
例如:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2)
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
效果等同于
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2)
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
```
|
||||
|
||||
## 例子 {#examples}
|
||||
|
@ -786,27 +786,52 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
|
||||
}
|
||||
|
||||
|
||||
void recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(const IAST & from, ASTExpressionList & into)
|
||||
std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(const ASTFunction & function, ASTExpressionList & into)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
const auto * function = from.as<ASTFunction>();
|
||||
if (!function)
|
||||
return;
|
||||
if (AggregateUtils::isAggregateFunction(function))
|
||||
return {true, 0};
|
||||
|
||||
if (AggregateUtils::isAggregateFunction(*function))
|
||||
return;
|
||||
UInt64 pushed_children = 0;
|
||||
bool has_aggregate = false;
|
||||
|
||||
for (const auto & child : function->arguments->children)
|
||||
for (const auto & child : function.arguments->children)
|
||||
{
|
||||
if (child->as<ASTIdentifier>())
|
||||
{
|
||||
into.children.push_back(child);
|
||||
else
|
||||
recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(*child, into);
|
||||
pushed_children++;
|
||||
}
|
||||
else if (child->as<ASTFunction>())
|
||||
{
|
||||
const auto * child_func = child->as<ASTFunction>();
|
||||
|
||||
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*child_func, into);
|
||||
|
||||
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
||||
/// so use the current function to replace its children
|
||||
if (!child_has_aggregate)
|
||||
{
|
||||
for (UInt64 i = 0; i < child_pushed_children; i++)
|
||||
{
|
||||
into.children.pop_back();
|
||||
}
|
||||
into.children.push_back(child);
|
||||
pushed_children = pushed_children - child_pushed_children + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
has_aggregate = true;
|
||||
pushed_children += child_pushed_children;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {has_aggregate, pushed_children};
|
||||
}
|
||||
|
||||
/// Expand GROUP BY ALL by listing all the SELECT-ed columns that are not expressions of the aggregate functions
|
||||
/// Expand GROUP BY ALL
|
||||
void expandGroupByAll(ASTSelectQuery * select_query)
|
||||
{
|
||||
auto group_expression_list = std::make_shared<ASTExpressionList>();
|
||||
@ -815,14 +840,26 @@ void expandGroupByAll(ASTSelectQuery * select_query)
|
||||
{
|
||||
if (expr->as<ASTIdentifier>())
|
||||
group_expression_list->children.push_back(expr);
|
||||
else
|
||||
recursivelyCollectIdentifiersFromArgumentsOfOrdinaryFunctions(*expr, *group_expression_list);
|
||||
else if (expr->as<ASTFunction>())
|
||||
{
|
||||
auto [has_aggregate, pushed_children] = recursivelyCollectMaxOrdinaryExpressionsFromFunctionArgs(*expr->as<ASTFunction>(), *group_expression_list);
|
||||
|
||||
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
||||
/// so use the current function to replace its children
|
||||
if (!has_aggregate)
|
||||
{
|
||||
for (UInt64 i = 0; i < pushed_children; i++)
|
||||
{
|
||||
group_expression_list->children.pop_back();
|
||||
}
|
||||
group_expression_list->children.push_back(expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
|
||||
}
|
||||
|
||||
|
||||
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
|
||||
{
|
||||
/// There can not be aggregate functions inside the WHERE and PREWHERE.
|
||||
|
@ -1,17 +1,10 @@
|
||||
3 2
|
||||
2 2
|
||||
1 2
|
||||
1 3 2
|
||||
1 2 2
|
||||
1 1 2
|
||||
7 2
|
||||
5 2
|
||||
3 2
|
||||
4 2
|
||||
3 2
|
||||
5 1
|
||||
6 1
|
||||
3 5 1
|
||||
1 3 2
|
||||
3 6 1
|
||||
2 4 2
|
||||
abc4 1
|
||||
abc1 1
|
||||
abc3 1
|
||||
abc2 1
|
||||
abc 4
|
||||
abc ab
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
|
@ -2,16 +2,14 @@ DROP TABLE IF EXISTS group_by_all;
|
||||
|
||||
CREATE TABLE group_by_all
|
||||
(
|
||||
a int,
|
||||
b int,
|
||||
ALL int
|
||||
a String,
|
||||
b int
|
||||
)
|
||||
engine = Memory;
|
||||
|
||||
INSERT INTO group_by_all VALUES (1, 2, 3), (1, 2, 3), (2, 4, 4), (2, 5, 4), (3, 5, 5), (3, 5, 6);
|
||||
insert into group_by_all values ('abc1', 0), ('abc2', 0), ('abc3', 0), ('abc4', 0);
|
||||
|
||||
SELECT a, COUNT(b) FROM group_by_all GROUP BY ALL;
|
||||
SELECT 1, a, COUNT(b) FROM group_by_all GROUP BY ALL;
|
||||
SELECT (a * 2) + 1, COUNT(b) FROM group_by_all GROUP BY ALL;
|
||||
SELECT `ALL`, COUNT(b) FROM group_by_all GROUP BY `ALL`;
|
||||
SELECT a, `ALL`, COUNT(b) FROM group_by_all GROUP BY ALL;
|
||||
select a, count(b) from group_by_all group by all;
|
||||
select substring(a, 1, 3), count(b) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
|
||||
|
Loading…
Reference in New Issue
Block a user