mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
fix DISTINCT and ORDER BY
This commit is contained in:
parent
8c72fa063d
commit
5bd025a180
@ -970,7 +970,9 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
|
||||
|
||||
// 1) Add actions for window functions and their arguments;
|
||||
// 2) Mark the columns that are really required.
|
||||
// 2) Mark the columns that are really required. We have to mark them as
|
||||
// required because we finish the expression chain before processing the
|
||||
// window functions.
|
||||
for (const auto & [_, w] : window_descriptions)
|
||||
{
|
||||
for (const auto & f : w.window_functions)
|
||||
@ -1425,49 +1427,47 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
/// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
|
||||
query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));
|
||||
|
||||
has_order_by = query.orderBy() != nullptr;
|
||||
// FIXME selected columns was set here. Should split order by and select
|
||||
// and insert window functions in between.
|
||||
// Will fix:
|
||||
// 1) window function in DISTINCT
|
||||
// 2) totally broken DISTINCT
|
||||
// 3) window functions in order by
|
||||
before_order_and_select = query_analyzer.appendOrderBy(
|
||||
chain,
|
||||
only_types || (need_aggregate ? !second_stage : !first_stage),
|
||||
optimize_read_in_order,
|
||||
order_by_elements_actions);
|
||||
|
||||
// Window functions are processed in a separate expression chain after
|
||||
// the main SELECT, similar to what we do for aggregate functions.
|
||||
if (has_window)
|
||||
{
|
||||
query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage);
|
||||
|
||||
auto select_required_columns = chain.getLastStep().required_output;
|
||||
|
||||
for (const auto & x : chain.getLastActions()->getNamesAndTypesList())
|
||||
{
|
||||
query_analyzer.columns_after_window.push_back(x);
|
||||
}
|
||||
|
||||
before_window = chain.getLastActions();
|
||||
finalize_chain(chain);
|
||||
|
||||
auto & step = chain.lastStep(query_analyzer.columns_after_window);
|
||||
step.required_output = select_required_columns;
|
||||
|
||||
// The output of this expression chain is the result of
|
||||
// SELECT (before "final projection" i.e. renaming the columns), so
|
||||
// we have to mark the expressions that are required in the output,
|
||||
// again. We did it for the previous expression chain ("select w/o
|
||||
// window functions") earlier, in appendSelect(). But that chain also
|
||||
// produced the expressions required to calculate window functions.
|
||||
// They are not needed in the final SELECT result. Knowing the correct
|
||||
// list of columns is important when we apply SELECT DISTINCT later.
|
||||
const auto * select_query = query_analyzer.getSelectQuery();
|
||||
for (const auto & child : select_query->select()->children)
|
||||
{
|
||||
if (const auto * function
|
||||
= typeid_cast<const ASTFunction *>(child.get());
|
||||
function && function->is_window_function)
|
||||
{
|
||||
// See its counterpart in appendSelect()
|
||||
step.required_output.push_back(child->getColumnName());
|
||||
}
|
||||
step.required_output.push_back(child->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
selected_columns = chain.getLastStep().required_output;
|
||||
|
||||
has_order_by = query.orderBy() != nullptr;
|
||||
before_order_by = query_analyzer.appendOrderBy(
|
||||
chain,
|
||||
only_types || (need_aggregate ? !second_stage : !first_stage),
|
||||
optimize_read_in_order,
|
||||
order_by_elements_actions);
|
||||
|
||||
if (query_analyzer.appendLimitBy(chain, only_types || !second_stage))
|
||||
{
|
||||
before_limit_by = chain.getLastActions();
|
||||
@ -1479,7 +1479,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
finalize_chain(chain);
|
||||
}
|
||||
|
||||
|
||||
/// Before executing WHERE and HAVING, remove the extra columns from the block (mostly the aggregation keys).
|
||||
removeExtraColumns();
|
||||
|
||||
@ -1610,9 +1609,9 @@ std::string ExpressionAnalysisResult::dump() const
|
||||
ss << "before_window " << before_window->dumpDAG() << "\n";
|
||||
}
|
||||
|
||||
if (before_order_and_select)
|
||||
if (before_order_by)
|
||||
{
|
||||
ss << "before_order_and_select " << before_order_and_select->dumpDAG() << "\n";
|
||||
ss << "before_order_by " << before_order_by->dumpDAG() << "\n";
|
||||
}
|
||||
|
||||
if (before_limit_by)
|
||||
@ -1625,6 +1624,20 @@ std::string ExpressionAnalysisResult::dump() const
|
||||
ss << "final_projection " << final_projection->dumpDAG() << "\n";
|
||||
}
|
||||
|
||||
if (!selected_columns.empty())
|
||||
{
|
||||
ss << "selected_columns ";
|
||||
for (size_t i = 0; i < selected_columns.size(); i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
ss << ", ";
|
||||
}
|
||||
ss << backQuote(selected_columns[i]);
|
||||
}
|
||||
ss << "\n";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
@ -204,11 +204,12 @@ struct ExpressionAnalysisResult
|
||||
ActionsDAGPtr before_aggregation;
|
||||
ActionsDAGPtr before_having;
|
||||
ActionsDAGPtr before_window;
|
||||
ActionsDAGPtr before_order_and_select;
|
||||
ActionsDAGPtr before_order_by;
|
||||
ActionsDAGPtr before_limit_by;
|
||||
ActionsDAGPtr final_projection;
|
||||
|
||||
/// Columns from the SELECT list, before renaming them to aliases.
|
||||
/// Columns from the SELECT list, before renaming them to aliases. Used to
|
||||
/// perform SELECT DISTINCT.
|
||||
Names selected_columns;
|
||||
|
||||
/// Columns will be removed after prewhere actions execution.
|
||||
|
@ -538,7 +538,10 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
|
||||
if (options.to_stage == QueryProcessingStage::Enum::WithMergeableState)
|
||||
{
|
||||
if (!analysis_result.need_aggregate)
|
||||
return analysis_result.before_order_and_select->getResultColumns();
|
||||
{
|
||||
// What's the difference with selected_columns?
|
||||
return analysis_result.before_order_by->getResultColumns();
|
||||
}
|
||||
|
||||
Block header = analysis_result.before_aggregation->getResultColumns();
|
||||
|
||||
@ -564,7 +567,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
|
||||
|
||||
if (options.to_stage == QueryProcessingStage::Enum::WithMergeableStateAfterAggregation)
|
||||
{
|
||||
return analysis_result.before_order_and_select->getResultColumns();
|
||||
// What's the difference with selected_columns?
|
||||
return analysis_result.before_order_by->getResultColumns();
|
||||
}
|
||||
|
||||
return analysis_result.final_projection->getResultColumns();
|
||||
@ -958,8 +962,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
}
|
||||
else
|
||||
{
|
||||
executeExpression(query_plan, expressions.before_order_and_select, "Before ORDER BY and SELECT");
|
||||
executeExpression(query_plan, expressions.before_window, "Before window functions");
|
||||
executeWindow(query_plan);
|
||||
executeExpression(query_plan, expressions.before_order_by, "Before ORDER BY");
|
||||
executeDistinct(query_plan, true, expressions.selected_columns, true);
|
||||
}
|
||||
|
||||
@ -1005,8 +1010,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
else if (expressions.hasHaving())
|
||||
executeHaving(query_plan, expressions.before_having);
|
||||
|
||||
executeExpression(query_plan, expressions.before_order_and_select, "Before ORDER BY and SELECT");
|
||||
executeExpression(query_plan, expressions.before_window,
|
||||
"Before window functions");
|
||||
executeWindow(query_plan);
|
||||
executeExpression(query_plan, expressions.before_order_by, "Before ORDER BY");
|
||||
executeDistinct(query_plan, true, expressions.selected_columns, true);
|
||||
|
||||
}
|
||||
@ -1745,6 +1752,11 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific
|
||||
|
||||
void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description)
|
||||
{
|
||||
if (!expression)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), expression);
|
||||
|
||||
expression_step->setStepDescription(description);
|
||||
|
@ -79,12 +79,22 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro
|
||||
9 9
|
||||
select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
|
||||
|
||||
-- should work in ORDER BY though
|
||||
-- doesn't work now
|
||||
-- select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
|
||||
-- should work in ORDER BY
|
||||
|
||||
-- at least it works in ORDER BY if you wrap it in a subquery
|
||||
select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
|
||||
|
||||
-- also works in ORDER BY if you wrap it in a subquery
|
||||
|
||||
9 9
|
||||
6 8
|
||||
7 8
|
||||
8 8
|
||||
3 5
|
||||
4 5
|
||||
5 5
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
select * from (select count(*) over () c from numbers(3)) order by c;
|
||||
|
||||
-- must work in WHERE if you wrap it in a subquery
|
||||
@ -168,4 +178,12 @@ select groupArray(number) over () from numbers(3);
|
||||
[0,1,2]
|
||||
select count(1) over (), max(number + 1) over () from numbers(3);
|
||||
|
||||
-- Should work in DISTINCT
|
||||
|
||||
1 3
|
||||
select distinct sum(0) over () from numbers(2);
|
||||
|
||||
0
|
||||
select distinct any(number) over () from numbers(2);
|
||||
|
||||
0
|
||||
|
@ -24,11 +24,10 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro
|
||||
-- last stage of select, after all other functions.
|
||||
select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
|
||||
|
||||
-- should work in ORDER BY though
|
||||
-- doesn't work now
|
||||
-- select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
|
||||
-- should work in ORDER BY
|
||||
select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
|
||||
|
||||
-- at least it works in ORDER BY if you wrap it in a subquery
|
||||
-- also works in ORDER BY if you wrap it in a subquery
|
||||
select * from (select count(*) over () c from numbers(3)) order by c;
|
||||
|
||||
-- must work in WHERE if you wrap it in a subquery
|
||||
@ -57,3 +56,7 @@ select groupArray(number) over () from numbers(3);
|
||||
-- This one tests we properly process the window function arguments.
|
||||
-- Seen errors like 'column `1` not found' from count(1).
|
||||
select count(1) over (), max(number + 1) over () from numbers(3);
|
||||
|
||||
-- Should work in DISTINCT
|
||||
select distinct sum(0) over () from numbers(2);
|
||||
select distinct any(number) over () from numbers(2);
|
||||
|
Loading…
Reference in New Issue
Block a user