mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Revert "fuse multi setting options into one when Optimize"
This reverts commit bbd7799375
.
This commit is contained in:
parent
bbd7799375
commit
a3db56f056
@ -588,118 +588,10 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me
|
||||
RewriteFunctionToSubcolumnVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
struct FuseSumCountAggregates
|
||||
{
|
||||
std::vector<ASTFunction *> sums {};
|
||||
std::vector<ASTFunction *> counts {};
|
||||
std::vector<ASTFunction *> avgs {};
|
||||
|
||||
void addFuncNode(ASTFunction * func)
|
||||
{
|
||||
if (func->name == "sum")
|
||||
sums.push_back(func);
|
||||
else if (func->name == "count")
|
||||
counts.push_back(func);
|
||||
else
|
||||
{
|
||||
assert(func->name == "avg");
|
||||
avgs.push_back(func);
|
||||
}
|
||||
}
|
||||
|
||||
bool canBeFused() const
|
||||
{
|
||||
// Need at least two different kinds of functions to fuse.
|
||||
if (sums.empty() && counts.empty())
|
||||
return false;
|
||||
if (sums.empty() && avgs.empty())
|
||||
return false;
|
||||
if (counts.empty() && avgs.empty())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct FuseSumCountAggregatesVisitorData
|
||||
{
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
std::unordered_map<String, FuseSumCountAggregates> fuse_map;
|
||||
|
||||
void visit(ASTFunction & func, ASTPtr &)
|
||||
{
|
||||
if (func.name == "sum" || func.name == "avg" || func.name == "count")
|
||||
{
|
||||
if (func.arguments->children.empty())
|
||||
return;
|
||||
|
||||
// Probably we can extend it to match count() for non-nullable argument
|
||||
// to sum/avg with any other argument. Now we require strict match.
|
||||
const auto argument = func.arguments->children.at(0)->getColumnName();
|
||||
auto it = fuse_map.find(argument);
|
||||
if (it != fuse_map.end())
|
||||
{
|
||||
it->second.addFuncNode(&func);
|
||||
}
|
||||
else
|
||||
{
|
||||
FuseSumCountAggregates funcs{};
|
||||
funcs.addFuncNode(&func);
|
||||
fuse_map[argument] = funcs;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using FuseSumCountAggregatesVisitor = InDepthNodeVisitor<OneTypeMatcher<FuseSumCountAggregatesVisitorData>, true>;
|
||||
|
||||
// Replaces one avg/sum/count function with an appropriate expression with
|
||||
// sumCount().
|
||||
void replaceWithSumCount(String column_name, ASTFunction & func)
|
||||
{
|
||||
auto func_base = makeASTFunction("sumCount", std::make_shared<ASTIdentifier>(column_name));
|
||||
auto exp_list = std::make_shared<ASTExpressionList>();
|
||||
if (func.name == "sum" || func.name == "count")
|
||||
{
|
||||
/// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2
|
||||
UInt8 idx = (func.name == "sum" ? 1 : 2);
|
||||
func.name = "tupleElement";
|
||||
exp_list->children.push_back(func_base);
|
||||
exp_list->children.push_back(std::make_shared<ASTLiteral>(idx));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Rewrite "avg" to sumCount().1 / sumCount().2
|
||||
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
|
||||
auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(2)));
|
||||
func.name = "divide";
|
||||
exp_list->children.push_back(new_arg1);
|
||||
exp_list->children.push_back(new_arg2);
|
||||
}
|
||||
func.arguments = exp_list;
|
||||
func.children.push_back(func.arguments);
|
||||
}
|
||||
|
||||
void fuseSumCountAggregates(std::unordered_map<String, FuseSumCountAggregates> & fuse_map)
|
||||
{
|
||||
for (auto & it : fuse_map)
|
||||
{
|
||||
if (it.second.canBeFused())
|
||||
{
|
||||
for (auto & func: it.second.sums)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
for (auto & func: it.second.avgs)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
for (auto & func: it.second.counts)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewrites multi quantile()() functions with the same arguments to quantiles()()[]
|
||||
/// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM...
|
||||
/// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ...
|
||||
void fuseQuantileCandidate(std::unordered_map<String, GatherFunctionQuantileData::FuseQuantileAggregatesData> & fuse_quantile)
|
||||
void fuseCandidate(std::unordered_map<String, GatherFunctionQuantileData::FuseQuantileAggregatesData> & fuse_quantile)
|
||||
{
|
||||
for (const auto & candidate : fuse_quantile)
|
||||
{
|
||||
@ -735,21 +627,13 @@ void fuseQuantileCandidate(std::unordered_map<String, GatherFunctionQuantileData
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void optimizeFuseQuantileFunctions(ASTPtr & query)
|
||||
{
|
||||
GatherFunctionQuantileVisitor::Data data{};
|
||||
GatherFunctionQuantileVisitor(data).visit(query);
|
||||
fuseQuantileCandidate(data.fuse_quantile);
|
||||
fuseCandidate(data.fuse_quantile);
|
||||
}
|
||||
|
||||
|
||||
void optimizeFuseSumCountFunctions(ASTPtr & query)
|
||||
{
|
||||
FuseSumCountAggregatesVisitor::Data data;
|
||||
FuseSumCountAggregatesVisitor(data).visit(query);
|
||||
fuseSumCountAggregates(data.fuse_map);
|
||||
}
|
||||
}
|
||||
|
||||
void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif)
|
||||
@ -848,15 +732,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
/// Remove duplicated columns from USING(...).
|
||||
optimizeUsing(select_query);
|
||||
|
||||
if (settings.optimize_fuse_functions)
|
||||
{
|
||||
// Try to fuse sum/avg/count with identical arguments to one sumCount call,
|
||||
// if we have at least two different functions. E.g. we will replace sum(x)
|
||||
// and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will
|
||||
// be calculated only once because of CSE.
|
||||
optimizeFuseSumCountFunctions(query);
|
||||
if (settings.optimize_fuse_quantile)
|
||||
optimizeFuseQuantileFunctions(query);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -180,8 +180,72 @@ struct CustomizeAggregateFunctionsMoveSuffixData
|
||||
}
|
||||
};
|
||||
|
||||
struct FuseSumCountAggregates
|
||||
{
|
||||
std::vector<ASTFunction *> sums {};
|
||||
std::vector<ASTFunction *> counts {};
|
||||
std::vector<ASTFunction *> avgs {};
|
||||
|
||||
void addFuncNode(ASTFunction * func)
|
||||
{
|
||||
if (func->name == "sum")
|
||||
sums.push_back(func);
|
||||
else if (func->name == "count")
|
||||
counts.push_back(func);
|
||||
else
|
||||
{
|
||||
assert(func->name == "avg");
|
||||
avgs.push_back(func);
|
||||
}
|
||||
}
|
||||
|
||||
bool canBeFused() const
|
||||
{
|
||||
// Need at least two different kinds of functions to fuse.
|
||||
if (sums.empty() && counts.empty())
|
||||
return false;
|
||||
if (sums.empty() && avgs.empty())
|
||||
return false;
|
||||
if (counts.empty() && avgs.empty())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct FuseSumCountAggregatesVisitorData
|
||||
{
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
std::unordered_map<String, FuseSumCountAggregates> fuse_map;
|
||||
|
||||
void visit(ASTFunction & func, ASTPtr &)
|
||||
{
|
||||
if (func.name == "sum" || func.name == "avg" || func.name == "count")
|
||||
{
|
||||
if (func.arguments->children.empty())
|
||||
return;
|
||||
|
||||
// Probably we can extend it to match count() for non-nullable argument
|
||||
// to sum/avg with any other argument. Now we require strict match.
|
||||
const auto argument = func.arguments->children.at(0)->getColumnName();
|
||||
auto it = fuse_map.find(argument);
|
||||
if (it != fuse_map.end())
|
||||
{
|
||||
it->second.addFuncNode(&func);
|
||||
}
|
||||
else
|
||||
{
|
||||
FuseSumCountAggregates funcs{};
|
||||
funcs.addFuncNode(&func);
|
||||
fuse_map[argument] = funcs;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsSuffixData>, true>;
|
||||
using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsMoveSuffixData>, true>;
|
||||
using FuseSumCountAggregatesVisitor = InDepthNodeVisitor<OneTypeMatcher<FuseSumCountAggregatesVisitorData>, true>;
|
||||
|
||||
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
|
||||
/// Expand asterisks and qualified asterisks with column names.
|
||||
@ -199,6 +263,49 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
|
||||
throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
|
||||
}
|
||||
|
||||
// Replaces one avg/sum/count function with an appropriate expression with
|
||||
// sumCount().
|
||||
void replaceWithSumCount(String column_name, ASTFunction & func)
|
||||
{
|
||||
auto func_base = makeASTFunction("sumCount", std::make_shared<ASTIdentifier>(column_name));
|
||||
auto exp_list = std::make_shared<ASTExpressionList>();
|
||||
if (func.name == "sum" || func.name == "count")
|
||||
{
|
||||
/// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2
|
||||
UInt8 idx = (func.name == "sum" ? 1 : 2);
|
||||
func.name = "tupleElement";
|
||||
exp_list->children.push_back(func_base);
|
||||
exp_list->children.push_back(std::make_shared<ASTLiteral>(idx));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Rewrite "avg" to sumCount().1 / sumCount().2
|
||||
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
|
||||
auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(2)));
|
||||
func.name = "divide";
|
||||
exp_list->children.push_back(new_arg1);
|
||||
exp_list->children.push_back(new_arg2);
|
||||
}
|
||||
func.arguments = exp_list;
|
||||
func.children.push_back(func.arguments);
|
||||
}
|
||||
|
||||
void fuseSumCountAggregates(std::unordered_map<String, FuseSumCountAggregates> & fuse_map)
|
||||
{
|
||||
for (auto & it : fuse_map)
|
||||
{
|
||||
if (it.second.canBeFused())
|
||||
{
|
||||
for (auto & func: it.second.sums)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
for (auto & func: it.second.avgs)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
for (auto & func: it.second.counts)
|
||||
replaceWithSumCount(it.first, *func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool hasArrayJoin(const ASTPtr & ast)
|
||||
{
|
||||
if (const ASTFunction * function = ast->as<ASTFunction>())
|
||||
@ -926,6 +1033,17 @@ void TreeRewriter::normalize(
|
||||
CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query);
|
||||
}
|
||||
|
||||
// Try to fuse sum/avg/count with identical arguments to one sumCount call,
|
||||
// if we have at least two different functions. E.g. we will replace sum(x)
|
||||
// and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will
|
||||
// be calculated only once because of CSE.
|
||||
if (settings.optimize_fuse_sum_count_avg)
|
||||
{
|
||||
FuseSumCountAggregatesVisitor::Data data;
|
||||
FuseSumCountAggregatesVisitor(data).visit(query);
|
||||
fuseSumCountAggregates(data.fuse_map);
|
||||
}
|
||||
|
||||
/// Rewrite all aggregate functions to add -OrNull suffix to them
|
||||
if (settings.aggregate_functions_null_for_empty)
|
||||
{
|
||||
|
@ -2,7 +2,7 @@ DROP TABLE IF EXISTS fuse_tbl;
|
||||
CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log;
|
||||
INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20);
|
||||
|
||||
SET optimize_fuse_functions= 1;
|
||||
SET optimize_fuse_sum_count_avg = 1;
|
||||
SELECT sum(a), sum(b), count(b) from fuse_tbl;
|
||||
EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl;
|
||||
SELECT '---------NOT trigger fuse--------';
|
||||
|
@ -14,7 +14,7 @@ SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)
|
||||
|
||||
|
||||
SELECT '---------After fuse result-----------';
|
||||
set optimize_fuse_functions=true;
|
||||
set optimize_fuse_quantile=true;
|
||||
SELECT 'quantile:';
|
||||
EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime;
|
||||
SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime;
|
||||
|
Loading…
Reference in New Issue
Block a user