mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
118e94523c
This check suggests replacing <Container>.count() by <Container>.contains() which is more speaking and in case of multimaps/multisets also faster.
126 lines
4.3 KiB
C++
126 lines
4.3 KiB
C++
#include <Common/typeid_cast.h>
|
|
#include <Parsers/ASTFunction.h>
|
|
#include <Parsers/ASTIdentifier.h>
|
|
#include <Parsers/ASTSubquery.h>
|
|
#include <Interpreters/RewriteAnyFunctionVisitor.h>
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace
|
|
{
|
|
|
|
bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
|
|
{
|
|
for (auto & arg : func.arguments->children)
|
|
{
|
|
if (const auto * arg_func = arg->as<ASTFunction>())
|
|
{
|
|
/// arrayJoin() is special and should not be optimized (think about
|
|
/// it as a an aggregate function), otherwise wrong result will be
|
|
/// produced:
|
|
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
|
|
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
|
|
/// │ 0 │ [] │
|
|
/// │ 0 │ [] │
|
|
/// └────────┴────────────────────────────────────┘
|
|
/// While should be:
|
|
/// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
|
|
/// │ 0 │ [] │
|
|
/// └────────┴─────────────────────────────────────────┘
|
|
if (arg_func->name == "arrayJoin")
|
|
return false;
|
|
|
|
if (arg_func->name == "lambda")
|
|
return false;
|
|
|
|
// We are looking for identifiers inside a function calculated inside
|
|
// the aggregate function `any()`. Window or aggregate function can't
|
|
// be inside `any`, but this check in GetAggregatesMatcher happens
|
|
// later, so we have to explicitly skip these nested functions here.
|
|
if (arg_func->is_window_function
|
|
|| AggregateFunctionFactory::instance().isAggregateFunctionName(
|
|
arg_func->name))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (!extractIdentifiers(*arg_func, identifiers))
|
|
return false;
|
|
}
|
|
else if (arg->as<ASTIdentifier>())
|
|
identifiers.emplace(&arg);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
|
|
{
|
|
if (auto * func = ast->as<ASTFunction>())
|
|
{
|
|
if (func->is_window_function)
|
|
return;
|
|
|
|
visit(*func, ast, data);
|
|
}
|
|
}
|
|
|
|
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
|
|
{
|
|
if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
|
|
return;
|
|
|
|
if (func.name != "any" && func.name != "anyLast")
|
|
return;
|
|
|
|
auto & func_arguments = func.arguments->children;
|
|
|
|
if (func_arguments.size() != 1)
|
|
return;
|
|
|
|
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
|
|
if (!first_arg_func || first_arg_func->arguments->children.empty())
|
|
return;
|
|
|
|
/// We have rewritten this function. Just unwrap its argument.
|
|
if (data.rewritten.contains(ast.get()))
|
|
{
|
|
func_arguments[0]->setAlias(func.alias);
|
|
ast = func_arguments[0];
|
|
return;
|
|
}
|
|
|
|
std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
|
|
if (!extractIdentifiers(func, identifiers))
|
|
return;
|
|
|
|
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
|
|
for (auto * ast_to_change : identifiers)
|
|
{
|
|
ASTPtr identifier_ast = *ast_to_change;
|
|
*ast_to_change = makeASTFunction(func.name);
|
|
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
|
|
}
|
|
|
|
data.rewritten.insert(ast.get());
|
|
|
|
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
|
|
func_arguments[0]->setAlias(func.alias);
|
|
ast = func_arguments[0];
|
|
}
|
|
|
|
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
|
|
{
|
|
return !node->as<ASTSubquery>() &&
|
|
!node->as<ASTTableExpression>() &&
|
|
!node->as<ASTArrayJoin>();
|
|
}
|
|
|
|
}
|