fix aliases in partition by/order by

This commit is contained in:
Alexander Kuzmenkov 2020-12-28 12:56:38 +03:00
parent a17f0b50ad
commit 2905f70cce
10 changed files with 93 additions and 61 deletions

View File

@ -405,8 +405,8 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
if (fn->is_window_function)
{
fuzzColumnLikeExpressionList(fn->window_partition_by);
fuzzOrderByList(fn->window_order_by);
fuzzColumnLikeExpressionList(fn->window_partition_by.get());
fuzzOrderByList(fn->window_order_by.get());
}
fuzz(fn->children);

View File

@ -738,15 +738,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (node.is_window_function)
{
// Also add columns from PARTITION BY and ORDER BY of window functions.
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
if (node.window_partition_by)
{
visit(node.window_partition_by->clone(), data);
visit(node.window_partition_by, data);
}
if (node.window_order_by)
{
visit(node.window_order_by->clone(), data);
visit(node.window_order_by, data);
}
// Also manually add columns for arguments of the window function itself.

View File

@ -983,36 +983,14 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
getRootActionsNoMakeSet(f.function_node->clone(),
true /* no_subqueries */, step.actions());
// FIXME rewrite this comment
// 1.2) result of window function: an empty INPUT.
// It is an aggregate function, so it won't be added by getRootActions.
// This is something of a hack. Other options:
// a] do it like aggregate function -- break the chain of actions
// and manually add window functions to the starting list of
// input columns. Logically this is similar to what we're doing
// now, but would require to split the window function processing
// into a full-fledged step after plain functions. This would be
// somewhat cumbersome. With INPUT hack we can avoid a separate
// step and pretend that window functions are almost "normal"
// select functions. The limitation of both these ways is that
// we can't reference window functions in other SELECT
// expressions.
// b] add a WINDOW action type, then sort, then split the chain on
// each WINDOW action and insert the Window pipeline between the
// Expression pipelines. This is a "proper" way that would allow
// us to depend on window functions in other functions. But it's
// complicated so I avoid doing it for now.
columns_after_window.push_back({f.column_name,
f.aggregate_function->getReturnType()});
// 2.1) function arguments;
for (const auto & a : f.function_node->arguments->children)
{
// 2.1) function arguments;
step.required_output.push_back(a->getColumnName());
}
}
// 2.3) PARTITION BY and ORDER BY columns.
// 2.1) PARTITION BY and ORDER BY columns.
for (const auto & c : w.full_sort_description)
{
step.required_output.push_back(c.column_name);
@ -1433,11 +1411,23 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
{
query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage);
// Build a list of output columns of the window step.
// 1) We need the columns that are the output of ExpressionActions.
for (const auto & x : chain.getLastActions()->getNamesAndTypesList())
{
query_analyzer.columns_after_window.push_back(x);
}
// 2) We also have to manually add the output of the window function
// to the list of the output columns of the window step, because the
// window functions are not in the ExpressionActions.
for (const auto & [_, w] : query_analyzer.window_descriptions)
{
for (const auto & f : w.window_functions)
{
query_analyzer.columns_after_window.push_back(
{f.column_name, f.aggregate_function->getReturnType()});
}
}
before_window = chain.getLastActions();
finalize_chain(chain);

View File

@ -148,7 +148,7 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data
/// Don't go into select query. It processes children itself.
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
void QueryNormalizer::visitChildren(ASTPtr & node, Data & data)
void QueryNormalizer::visitChildren(IAST * node, Data & data)
{
if (auto * func_node = node->as<ASTFunction>())
{
@ -159,27 +159,33 @@ void QueryNormalizer::visitChildren(ASTPtr & node, Data & data)
/// Don't go into query argument.
return;
}
/// We skip the first argument. We also assume that the lambda function can not have parameters.
size_t first_pos = 0;
if (func_node->name == "lambda")
first_pos = 1;
// Process all function node children (arguments + window definition for
// window functions).
// We have to skip the first argument if it's the "lambda" function,
// because it contains formal parameters like x->x + 1;
const IAST * argument_to_skip = nullptr;
if (func_node->name == "lambda"
&& func_node->arguments
&& !func_node->arguments->children.empty())
if (func_node->arguments)
{
argument_to_skip = func_node->arguments->children[0].get();
}
auto & func_children = func_node->arguments->children;
for (auto & child : func_node->children)
{
if (child.get() != argument_to_skip
&& needVisitChild(child))
for (size_t i = first_pos; i < func_children.size(); ++i)
{
visit(child, data);
auto & child = func_children[i];
if (needVisitChild(child))
visit(child, data);
}
}
if (func_node->window_partition_by)
{
visitChildren(func_node->window_partition_by.get(), data);
}
if (func_node->window_order_by)
{
visitChildren(func_node->window_order_by.get(), data);
}
}
else if (!node->as<ASTSelectQuery>())
{
@ -225,7 +231,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
if (ast.get() != initial_ast.get())
visit(ast, data);
else
visitChildren(ast, data);
visitChildren(ast.get(), data);
current_asts.erase(initial_ast.get());
current_asts.erase(ast.get());

View File

@ -69,7 +69,7 @@ private:
static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
static void visitChildren(ASTPtr & node, Data & data);
static void visitChildren(IAST * node, Data & data);
};
}

View File

@ -67,17 +67,20 @@ ASTPtr ASTFunction::clone() const
if (window_name)
{
res->set(res->window_name, window_name->clone());
res->window_name = window_name->clone();
res->children.push_back(res->window_name);
}
if (window_partition_by)
{
res->set(res->window_partition_by, window_partition_by->clone());
res->window_partition_by = window_partition_by->clone();
res->children.push_back(res->window_partition_by);
}
if (window_order_by)
{
res->set(res->window_order_by, window_order_by->clone());
res->window_order_by = window_order_by->clone();
res->children.push_back(res->window_order_by);
}
return res;

View File

@ -21,9 +21,25 @@ public:
ASTPtr parameters;
bool is_window_function = false;
ASTIdentifier * window_name;
ASTExpressionList * window_partition_by;
ASTExpressionList * window_order_by;
// We have to make these fields ASTPtr because this is what the visitors
// expect. Some of them take const ASTPtr & (makes no sense), and some
// take ASTPtr & and modify it. I don't understand how the latter is
// compatible with also having an owning `children` array -- apparently it
// leads to some dangling children that are not referenced by the fields of
// the AST class itself. Some older code hints at the idea of having
// ownership in `children` only, and making the class fields to be raw
// pointers of proper type (see e.g. IAST::set), but this is not compatible
// with the visitor interface.
// ASTIdentifier
ASTPtr window_name;
// ASTExpressionList
ASTPtr window_partition_by;
// ASTExpressionList of
ASTPtr window_order_by;
/// do not print empty parentheses if there are no args - compatibility with new AST for data types and engine names.
bool no_empty_args = false;

View File

@ -419,7 +419,8 @@ bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
ParserIdentifier window_name_parser;
if (window_name_parser.parse(pos, window_name_ast, expected))
{
function->set(function->window_name, window_name_ast);
function->children.push_back(window_name_ast);
function->window_name = window_name_ast;
return true;
}
else
@ -442,7 +443,8 @@ bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
ASTPtr partition_by_ast;
if (columns_partition_by.parse(pos, partition_by_ast, expected))
{
function->set(function->window_partition_by, partition_by_ast);
function->children.push_back(partition_by_ast);
function->window_partition_by = partition_by_ast;
}
else
{
@ -455,7 +457,8 @@ bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
ASTPtr order_by_ast;
if (columns_order_by.parse(pos, order_by_ast, expected))
{
function->set(function->window_order_by, order_by_ast);
function->children.push_back(order_by_ast);
function->window_order_by = order_by_ast;
}
else
{

View File

@ -122,14 +122,14 @@ select * from (select * from numbers(5) order by rand()) order by count() over (
2
3
4
select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number)) over (order by min(number) desc) desc;
select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
-- different windows
-- an explain test would also be helpful, but it's too immature now and I don't
-- want to change reference all the time
1
0
1
2
3
4
@ -204,4 +204,16 @@ select distinct sum(0) over () from numbers(2);
0
select distinct any(number) over () from numbers(2);
-- Various kinds of aliases are properly substituted into various parts of window
-- function definition.
0
with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
0 1
0 3
0 6
1 5
1 11
1 18
2 9

View File

@ -41,7 +41,7 @@ select * from (select * from numbers(5) order by rand()) order by count() over (
-- Aggregate functions as window function arguments. This query is semantically
-- the same as the above one, only we replace `number` with
-- `any(number) group by number` and so on.
select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number)) over (order by min(number) desc) desc;
select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
-- different windows
-- an explain test would also be helpful, but it's too immature now and I don't
@ -66,3 +66,7 @@ select count(1) over (), max(number + 1) over () from numbers(3);
-- Should work in DISTINCT
select distinct sum(0) over () from numbers(2);
select distinct any(number) over () from numbers(2);
-- Various kinds of aliases are properly substituted into various parts of window
-- function definition.
with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);