ClickHouse/src/Interpreters/RewriteAnyFunctionVisitor.cpp

126 lines
4.3 KiB
C++
Raw Normal View History

#include <Common/typeid_cast.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace
{
bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
{
for (auto & arg : func.arguments->children)
{
if (const auto * arg_func = arg->as<ASTFunction>())
{
/// arrayJoin() is special and should not be optimized (think about
/// it as a an aggregate function), otherwise wrong result will be
/// produced:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
/// While should be:
/// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
/// │ 0 │ [] │
/// └────────┴─────────────────────────────────────────┘
if (arg_func->name == "arrayJoin")
return false;
if (arg_func->name == "lambda")
return false;
2020-12-18 17:13:28 +00:00
// We are looking for identifiers inside a function calculated inside
// the aggregate function `any()`. Window or aggregate function can't
2020-12-21 09:59:11 +00:00
// be inside `any`, but this check in GetAggregatesMatcher happens
// later, so we have to explicitly skip these nested functions here.
2020-12-18 17:13:28 +00:00
if (arg_func->is_window_function
2020-12-16 21:44:05 +00:00
|| AggregateFunctionFactory::instance().isAggregateFunctionName(
arg_func->name))
{
2020-12-21 09:59:11 +00:00
return false;
2020-12-16 21:44:05 +00:00
}
if (!extractIdentifiers(*arg_func, identifiers))
return false;
}
else if (arg->as<ASTIdentifier>())
identifiers.emplace(&arg);
}
return true;
}
}
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
2021-12-09 11:14:50 +00:00
{
if (func->is_window_function)
return;
visit(*func, ast, data);
2021-12-09 11:14:50 +00:00
}
}
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
return;
if (func.name != "any" && func.name != "anyLast")
return;
auto & func_arguments = func.arguments->children;
2021-01-12 19:58:54 +00:00
if (func_arguments.size() != 1)
return;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;
/// We have rewritten this function. Just unwrap its argument.
if (data.rewritten.contains(ast.get()))
{
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
return;
}
std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
if (!extractIdentifiers(func, identifiers))
return;
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
for (auto * ast_to_change : identifiers)
{
ASTPtr identifier_ast = *ast_to_change;
*ast_to_change = makeASTFunction(func.name);
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
}
data.rewritten.insert(ast.get());
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
}
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>() &&
!node->as<ASTTableExpression>() &&
!node->as<ASTArrayJoin>();
}
}