Respect stateful functions in the optimization

This commit is contained in:
Igor Nikonov 2022-12-09 11:57:58 +00:00
parent 49694578ba
commit c2b10e8dba
4 changed files with 118 additions and 24 deletions

View File

@ -24,16 +24,51 @@ void printStepName(const char * prefix, const QueryPlan::Node * node)
LOG_DEBUG(&Poco::Logger::get("RedundantOrderBy"), "{}: {}: {}", prefix, stepName(node), reinterpret_cast<void *>(node->step.get()));
}
void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
struct FrameWithParent
{
struct Frame
{
QueryPlan::Node * node = nullptr;
QueryPlan::Node * parent_node = nullptr;
size_t next_child = 0;
};
};
std::vector<Frame> stack;
using StackWithParent = std::vector<FrameWithParent>;
bool checkForStatefulFunctions(std::vector<FrameWithParent> const & stack, const QueryPlan::Node * node_affect_order)
{
chassert(!stack.empty());
chassert(typeid_cast<const SortingStep *>(stack.back().node->step.get()));
/// skip element on top of stack since it's sorting
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
{
const auto * node = it->node;
if (node == node_affect_order)
break;
const auto * step = node->step.get();
const auto * expr = typeid_cast<const ExpressionStep *>(step);
if (expr)
{
if (expr->getExpression()->hasStatefulFunctions())
return true;
}
else
{
const auto * trans = typeid_cast<const ITransformingStep *>(step);
if (!trans)
break;
if (!trans->getDataStreamTraits().preserves_sorting)
break;
}
}
return false;
}
void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
{
StackWithParent stack;
stack.push_back({.node = root});
std::vector<QueryPlan::Node *> nodes_affect_order;
@ -44,7 +79,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
QueryPlan::Node * current_node = frame.node;
QueryPlan::Node * parent_node = frame.parent_node;
IQueryPlanStep * current_step = frame.node->step.get();
IQueryPlanStep * current_step = current_node->step.get();
printStepName("back", current_node);
/// top-down visit
@ -66,7 +101,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
return false;
bool remove_sorting = false;
/// TODO: (0) check stateful functions
/// (1) aggregation
if (const AggregatingStep * parent_aggr = typeid_cast<AggregatingStep *>(step_affect_order); parent_aggr)
{
@ -88,6 +123,12 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
if (remove_sorting)
{
/// if there is expression with stateful function between current step
/// and step which affects order, then we need to keep sorting since
/// stateful function output can depend on order
if (checkForStatefulFunctions(stack, node_affect_order))
return false;
chassert(typeid_cast<ExpressionStep *>(current_node->children.front()->step.get()));
chassert(!current_node->children.front()->children.empty());
@ -104,7 +145,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
frame.next_child = frame.node->children.size();
/// current sorting step has been removed from plan, its parent has new children, need to visit them
auto next_frame = Frame{.node = parent_node->children[0], .parent_node = parent_node};
auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node};
++frame.next_child;
printStepName("push", next_frame.node);
stack.push_back(next_frame);
@ -126,7 +167,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
/// Traverse all children
if (frame.next_child < frame.node->children.size())
{
auto next_frame = Frame{.node = current_node->children[frame.next_child], .parent_node = current_node};
auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node};
++frame.next_child;
printStepName("push", next_frame.node);
stack.push_back(next_frame);

View File

@ -447,10 +447,11 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings)
{
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
if (optimization_settings.remove_redundant_order_by)
QueryPlanOptimizations::tryRemoveRedundantOrderBy(root);
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
}

View File

@ -22,7 +22,7 @@ Expression (Project names)
Header: number UInt64
Sorting (Sorting for ORDER BY)
Header: number_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -37,7 +37,7 @@ Header: number UInt64
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -54,7 +54,7 @@ Header: number UInt64
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -69,7 +69,7 @@ Header: t1.number UInt64
Header: t1.number_0 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -77,7 +77,7 @@ Header: t1.number UInt64
Header: t2.number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_4 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_4 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -94,11 +94,11 @@ Header: t1.number UInt64
Join (JOIN FillRightFirst)
Header: t1.number_0 UInt64
t2.number_1 UInt64
Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))
Expression ((Change column names to column identifiers + Project names))
Header: t1.number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))
Expression ((Change column names to column identifiers + Project names))
Header: t2.number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -108,7 +108,7 @@ Header: sum(number) UInt64
Aggregating
Header: number_0 UInt64
sum(number_0) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Header: number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -122,7 +122,7 @@ Header: any(number) UInt64
Header: number_0 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -137,7 +137,7 @@ Header: a UInt64
Aggregating
Header: number_1 UInt64
sum(number_1) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
@ -171,7 +171,30 @@ Header: a UInt64
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function
-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
ReadFromStorage (SystemNumbers)
-- non-stateful function does _not_ prevent removing inner ORDER BY
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
Expression (Project names)
Expression (Projection)
Expression (Change column names to column identifiers)
ReadFromStorage (SystemNumbers)

View File

@ -209,3 +209,32 @@ FROM
GROUP BY number
)
WHERE a > 0;
SELECT '-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function';
SET query_plan_enable_optimizations = 0;
SELECT '-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order';
EXPLAIN
SELECT
number,
neighbor(number, 2)
FROM
(
SELECT *
FROM numbers(10)
ORDER BY number DESC
)
ORDER BY number ASC;
SELECT '-- non-stateful function does _not_ prevent removing inner ORDER BY';
EXPLAIN
SELECT
number,
plus(number, 2)
FROM
(
SELECT *
FROM numbers(10)
ORDER BY number DESC
)
ORDER BY number ASC;