mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-12 01:12:12 +00:00
Respect stateful functions in the optimization
This commit is contained in:
parent
49694578ba
commit
c2b10e8dba
@ -24,16 +24,51 @@ void printStepName(const char * prefix, const QueryPlan::Node * node)
|
||||
LOG_DEBUG(&Poco::Logger::get("RedundantOrderBy"), "{}: {}: {}", prefix, stepName(node), reinterpret_cast<void *>(node->step.get()));
|
||||
}
|
||||
|
||||
void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
struct FrameWithParent
|
||||
{
|
||||
struct Frame
|
||||
{
|
||||
QueryPlan::Node * node = nullptr;
|
||||
QueryPlan::Node * parent_node = nullptr;
|
||||
size_t next_child = 0;
|
||||
};
|
||||
};
|
||||
|
||||
std::vector<Frame> stack;
|
||||
using StackWithParent = std::vector<FrameWithParent>;
|
||||
|
||||
bool checkForStatefulFunctions(std::vector<FrameWithParent> const & stack, const QueryPlan::Node * node_affect_order)
|
||||
{
|
||||
chassert(!stack.empty());
|
||||
chassert(typeid_cast<const SortingStep *>(stack.back().node->step.get()));
|
||||
|
||||
/// skip element on top of stack since it's sorting
|
||||
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
|
||||
{
|
||||
const auto * node = it->node;
|
||||
if (node == node_affect_order)
|
||||
break;
|
||||
|
||||
const auto * step = node->step.get();
|
||||
|
||||
const auto * expr = typeid_cast<const ExpressionStep *>(step);
|
||||
if (expr)
|
||||
{
|
||||
if (expr->getExpression()->hasStatefulFunctions())
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * trans = typeid_cast<const ITransformingStep *>(step);
|
||||
if (!trans)
|
||||
break;
|
||||
|
||||
if (!trans->getDataStreamTraits().preserves_sorting)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
{
|
||||
StackWithParent stack;
|
||||
stack.push_back({.node = root});
|
||||
|
||||
std::vector<QueryPlan::Node *> nodes_affect_order;
|
||||
@ -44,7 +79,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
|
||||
QueryPlan::Node * current_node = frame.node;
|
||||
QueryPlan::Node * parent_node = frame.parent_node;
|
||||
IQueryPlanStep * current_step = frame.node->step.get();
|
||||
IQueryPlanStep * current_step = current_node->step.get();
|
||||
printStepName("back", current_node);
|
||||
|
||||
/// top-down visit
|
||||
@ -66,7 +101,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
return false;
|
||||
|
||||
bool remove_sorting = false;
|
||||
/// TODO: (0) check stateful functions
|
||||
|
||||
/// (1) aggregation
|
||||
if (const AggregatingStep * parent_aggr = typeid_cast<AggregatingStep *>(step_affect_order); parent_aggr)
|
||||
{
|
||||
@ -88,6 +123,12 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
|
||||
if (remove_sorting)
|
||||
{
|
||||
/// if there is expression with stateful function between current step
|
||||
/// and step which affects order, then we need to keep sorting since
|
||||
/// stateful function output can depend on order
|
||||
if (checkForStatefulFunctions(stack, node_affect_order))
|
||||
return false;
|
||||
|
||||
chassert(typeid_cast<ExpressionStep *>(current_node->children.front()->step.get()));
|
||||
chassert(!current_node->children.front()->children.empty());
|
||||
|
||||
@ -104,7 +145,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
frame.next_child = frame.node->children.size();
|
||||
|
||||
/// current sorting step has been removed from plan, its parent has new children, need to visit them
|
||||
auto next_frame = Frame{.node = parent_node->children[0], .parent_node = parent_node};
|
||||
auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node};
|
||||
++frame.next_child;
|
||||
printStepName("push", next_frame.node);
|
||||
stack.push_back(next_frame);
|
||||
@ -126,7 +167,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
/// Traverse all children
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
auto next_frame = Frame{.node = current_node->children[frame.next_child], .parent_node = current_node};
|
||||
auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node};
|
||||
++frame.next_child;
|
||||
printStepName("push", next_frame.node);
|
||||
stack.push_back(next_frame);
|
||||
|
@ -447,10 +447,11 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
|
||||
|
||||
void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings)
|
||||
{
|
||||
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
|
||||
|
||||
if (optimization_settings.remove_redundant_order_by)
|
||||
QueryPlanOptimizations::tryRemoveRedundantOrderBy(root);
|
||||
|
||||
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
|
||||
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ Expression (Project names)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_0 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_0 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -37,7 +37,7 @@ Header: number UInt64
|
||||
Header: number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_1 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -54,7 +54,7 @@ Header: number UInt64
|
||||
Header: number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_1 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -69,7 +69,7 @@ Header: t1.number UInt64
|
||||
Header: t1.number_0 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_2 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_2 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -77,7 +77,7 @@ Header: t1.number UInt64
|
||||
Header: t2.number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_4 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_4 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -94,11 +94,11 @@ Header: t1.number UInt64
|
||||
Join (JOIN FillRightFirst)
|
||||
Header: t1.number_0 UInt64
|
||||
t2.number_1 UInt64
|
||||
Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))
|
||||
Expression ((Change column names to column identifiers + Project names))
|
||||
Header: t1.number_0 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))
|
||||
Expression ((Change column names to column identifiers + Project names))
|
||||
Header: t2.number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -108,7 +108,7 @@ Header: sum(number) UInt64
|
||||
Aggregating
|
||||
Header: number_0 UInt64
|
||||
sum(number_0) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
|
||||
Header: number_0 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -122,7 +122,7 @@ Header: any(number) UInt64
|
||||
Header: number_0 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_1 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -137,7 +137,7 @@ Header: a UInt64
|
||||
Aggregating
|
||||
Header: number_1 UInt64
|
||||
sum(number_1) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
@ -171,7 +171,30 @@ Header: a UInt64
|
||||
Header: number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_2 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
|
||||
Header: number_2 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function
|
||||
-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order
|
||||
Expression (Project names)
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
Expression (Projection)
|
||||
Expression (Change column names to column identifiers)
|
||||
Expression (Project names)
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
Expression (Projection)
|
||||
Expression (Change column names to column identifiers)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- non-stateful function does _not_ prevent removing inner ORDER BY
|
||||
Expression (Project names)
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
Expression (Projection)
|
||||
Expression (Change column names to column identifiers)
|
||||
Expression (Project names)
|
||||
Expression (Projection)
|
||||
Expression (Change column names to column identifiers)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
|
@ -209,3 +209,32 @@ FROM
|
||||
GROUP BY number
|
||||
)
|
||||
WHERE a > 0;
|
||||
|
||||
SELECT '-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function';
|
||||
SET query_plan_enable_optimizations = 0;
|
||||
|
||||
SELECT '-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order';
|
||||
EXPLAIN
|
||||
SELECT
|
||||
number,
|
||||
neighbor(number, 2)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(10)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC;
|
||||
|
||||
SELECT '-- non-stateful function does _not_ prevent removing inner ORDER BY';
|
||||
EXPLAIN
|
||||
SELECT
|
||||
number,
|
||||
plus(number, 2)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(10)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC;
|
||||
|
Loading…
Reference in New Issue
Block a user