Merge pull request #45558 from ClickHouse/vdimir/remove-ast-fuse

This commit is contained in:
Vladimir C 2023-01-25 11:25:36 +01:00 committed by GitHub
commit cfefaebe8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 5 additions and 169 deletions

View File

@ -528,8 +528,7 @@ class IColumn;
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
M(Bool, optimize_syntax_fuse_functions, false, "Not ready for production, do not use. Allow apply syntax optimisation: fuse aggregate functions", 0) \
M(Bool, optimize_fuse_sum_count_avg, false, "Replace calls of functions `sum`, `avg`, `count` with identical arguments into one `sumCount`", 0) \
M(Bool, optimize_syntax_fuse_functions, false, "Allow apply fuse aggregating function. Available only with `allow_experimental_analyzer`", 0) \
M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
@ -727,6 +726,8 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \
MAKE_OBSOLETE(M, Seconds, temporary_live_view_timeout, 1) \
MAKE_OBSOLETE(M, Milliseconds, async_insert_cleanup_timeout_ms, 1000) \
MAKE_OBSOLETE(M, Bool, optimize_fuse_sum_count_avg, 0) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -208,73 +208,8 @@ struct CustomizeAggregateFunctionsMoveSuffixData
}
};
struct FuseSumCountAggregates
{
std::vector<ASTFunction *> sums {};
std::vector<ASTFunction *> counts {};
std::vector<ASTFunction *> avgs {};
void addFuncNode(ASTFunction * func)
{
if (func->name == "sum")
sums.push_back(func);
else if (func->name == "count")
counts.push_back(func);
else
{
assert(func->name == "avg");
avgs.push_back(func);
}
}
bool canBeFused() const
{
// Need at least two different kinds of functions to fuse.
if (sums.empty() && counts.empty())
return false;
if (sums.empty() && avgs.empty())
return false;
if (counts.empty() && avgs.empty())
return false;
return true;
}
};
struct FuseSumCountAggregatesVisitorData
{
using TypeToVisit = ASTFunction;
std::unordered_map<String, FuseSumCountAggregates> fuse_map;
void visit(ASTFunction & func, ASTPtr &)
{
if (func.name == "sum" || func.name == "avg" || func.name == "count")
{
if (func.arguments->children.empty())
return;
// Probably we can extend it to match count() for non-nullable argument
// to sum/avg with any other argument. Now we require strict match.
const auto argument = func.arguments->children.at(0)->getColumnName();
auto it = fuse_map.find(argument);
if (it != fuse_map.end())
{
it->second.addFuncNode(&func);
}
else
{
FuseSumCountAggregates funcs{};
funcs.addFuncNode(&func);
fuse_map[argument] = funcs;
}
}
}
};
using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsSuffixData>, true>;
using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsMoveSuffixData>, true>;
using FuseSumCountAggregatesVisitor = InDepthNodeVisitor<OneTypeMatcher<FuseSumCountAggregatesVisitorData>, true>;
struct ExistsExpressionData
{
@ -376,52 +311,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, "Empty list of columns in SELECT query");
}
// Replaces one avg/sum/count function with an appropriate expression with
// sumCount().
void replaceWithSumCount(String column_name, ASTFunction & func)
{
auto func_base = makeASTFunction("sumCount", std::make_shared<ASTIdentifier>(column_name));
auto exp_list = std::make_shared<ASTExpressionList>();
if (func.name == "sum" || func.name == "count")
{
/// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2
UInt8 idx = (func.name == "sum" ? 1 : 2);
func.name = "tupleElement";
exp_list->children.push_back(func_base);
exp_list->children.push_back(std::make_shared<ASTLiteral>(idx));
}
else
{
/// Rewrite "avg" to sumCount().1 / sumCount().2
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
auto new_arg2 = makeASTFunction("CAST",
makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(static_cast<UInt8>(2))),
std::make_shared<ASTLiteral>("Float64"));
func.name = "divide";
exp_list->children.push_back(new_arg1);
exp_list->children.push_back(new_arg2);
}
func.arguments = exp_list;
func.children.push_back(func.arguments);
}
void fuseSumCountAggregates(std::unordered_map<String, FuseSumCountAggregates> & fuse_map)
{
for (auto & it : fuse_map)
{
if (it.second.canBeFused())
{
for (auto & func: it.second.sums)
replaceWithSumCount(it.first, *func);
for (auto & func: it.second.avgs)
replaceWithSumCount(it.first, *func);
for (auto & func: it.second.counts)
replaceWithSumCount(it.first, *func);
}
}
}
bool hasArrayJoin(const ASTPtr & ast)
{
if (const ASTFunction * function = ast->as<ASTFunction>())
@ -1544,17 +1433,6 @@ void TreeRewriter::normalize(
CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query);
}
// Try to fuse sum/avg/count with identical arguments to one sumCount call,
// if we have at least two different functions. E.g. we will replace sum(x)
// and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will
// be calculated only once because of CSE.
if (settings.optimize_fuse_sum_count_avg && settings.optimize_syntax_fuse_functions)
{
FuseSumCountAggregatesVisitor::Data data;
FuseSumCountAggregatesVisitor(data).visit(query);
fuseSumCountAggregates(data.fuse_map);
}
/// Rewrite all aggregate functions to add -OrNull suffix to them
if (settings.aggregate_functions_null_for_empty)
{

View File

@ -1,12 +0,0 @@
210 230 20
SELECT
sum(a),
sumCount(b).1,
sumCount(b).2
FROM fuse_tbl
---------NOT trigger fuse--------
210 11.5
SELECT
sum(a),
avg(b)
FROM fuse_tbl

View File

@ -1,14 +0,0 @@
DROP TABLE IF EXISTS fuse_tbl;
CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log;
INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20);
SET optimize_syntax_fuse_functions = 1;
SET optimize_fuse_sum_count_avg = 1;
SELECT sum(a), sum(b), count(b) from fuse_tbl;
EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl;
SELECT '---------NOT trigger fuse--------';
SELECT sum(a), avg(b) from fuse_tbl;
EXPLAIN SYNTAX SELECT sum(a), avg(b) from fuse_tbl;
DROP TABLE fuse_tbl;

View File

@ -1,2 +0,0 @@
0 0 nan
0 0 nan

View File

@ -1,5 +0,0 @@
SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0))
SETTINGS optimize_syntax_fuse_functions = 0, optimize_fuse_sum_count_avg = 0;
SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0))
SETTINGS optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1;

View File

@ -1,5 +1,5 @@
SET allow_experimental_analyzer = 1;
SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1;
SET optimize_syntax_fuse_functions = 1;
DROP TABLE IF EXISTS fuse_tbl;

View File

@ -1,5 +1,5 @@
SET allow_experimental_analyzer = 1;
SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1;
SET optimize_syntax_fuse_functions = 1;
DROP TABLE IF EXISTS fuse_tbl;

View File

@ -1,10 +0,0 @@
DROP TABLE IF EXISTS fuse_tbl__fuzz_35;
CREATE TABLE fuse_tbl__fuzz_35 (`a` UInt8, `b` Nullable(Int16)) ENGINE = Log;
INSERT INTO fuse_tbl__fuzz_35 SELECT number, number + 1 FROM numbers(1000);
set allow_experimental_analyzer = 0, optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1;
SELECT quantile(0.5)(b), quantile(0.9)(b) FROM (SELECT x + 2147483648 AS b FROM (SELECT quantile(0.5)(b) AS x FROM fuse_tbl__fuzz_35) GROUP BY x) FORMAT Null;
DROP TABLE IF EXISTS fuse_tbl__fuzz_35;