Mark all input columns in LIMIT BY as required output

The query analyzer only marks the actual arguments of LIMIT BY as required
output for the LimitBy step in the pipeline. This is fine, unless the query is
distributed, in which case the first stage might remove a column that is used
at the second stage (e.g. for ORDER BY) but is not part of the final select.

Prevent removal of any columns in LimitBy by marking all input columns as
required output.
This commit is contained in:
Constantin S. Pan 2019-05-23 17:25:53 +01:00
parent 15e31d5a8c
commit cbc0f56af1
3 changed files with 13 additions and 1 deletions

View File

@ -849,7 +849,15 @@ bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only
getRootActions(select_query->limitBy(), only_types, step.actions);
NameSet aggregated_names;
for (const auto & column : aggregated_columns)
{
step.required_output.push_back(column.name);
aggregated_names.insert(column.name);
}
for (const auto & child : select_query->limitBy()->children)
if (!aggregated_names.count(child->getColumnName()))
step.required_output.push_back(child->getColumnName());
return true;

View File

@ -24,3 +24,4 @@
101
101
102
1

View File

@ -32,4 +32,7 @@ SELECT 1 as one FROM remote('127.0.0.{2,3}', system.one) LIMIT 1 BY one;
-- Distributed LIMIT BY with LIMIT
SELECT toInt8(number / 5 + 100) AS x FROM remote('127.0.0.1', system.numbers) LIMIT 2 BY x LIMIT 5;
-- Distributed LIMIT BY with ORDER BY non-selected column
SELECT 1 AS x FROM remote('127.0.0.{2,3}', system.one) ORDER BY dummy LIMIT 1 BY x;
DROP TABLE IF EXISTS limit_by;