mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 04:52:10 +00:00
Fix: correct update of data stream sorting properties after removing
sorting
This commit is contained in:
parent
1866f990de
commit
57d2fd300a
@ -90,6 +90,20 @@ private:
|
||||
|
||||
const Derived & getDerived() const { return *static_cast<Derived *>(this); }
|
||||
|
||||
std::unordered_map<const IQueryPlanStep*, std::string> address2name;
|
||||
std::unordered_map<std::string, UInt32> name_gen;
|
||||
|
||||
std::string getStepId(const IQueryPlanStep* step)
|
||||
{
|
||||
const auto step_name = step->getName();
|
||||
auto it = address2name.find(step);
|
||||
if (it != address2name.end())
|
||||
return it->second;
|
||||
|
||||
const auto seq_num = name_gen[step_name]++;
|
||||
return address2name.insert({step, fmt::format("{}{}", step_name, seq_num)}).first->second;
|
||||
}
|
||||
|
||||
protected:
|
||||
void logStep(const char * prefix, const QueryPlan::Node * node)
|
||||
{
|
||||
@ -100,13 +114,13 @@ protected:
|
||||
&Poco::Logger::get("QueryPlanVisitor"),
|
||||
"{}: {}: {}",
|
||||
prefix,
|
||||
current_step->getName(),
|
||||
getStepId(current_step),
|
||||
reinterpret_cast<const void *>(current_step));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
constexpr bool debug_logging_enabled = false;
|
||||
constexpr bool debug_logging_enabled = true;
|
||||
|
||||
class RemoveRedundantSorting : public QueryPlanVisitor<RemoveRedundantSorting, debug_logging_enabled>
|
||||
{
|
||||
@ -182,9 +196,13 @@ private:
|
||||
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
|
||||
{
|
||||
const QueryPlan::Node * node = it->node;
|
||||
/// skip removed sorting steps
|
||||
auto * step = node->step.get();
|
||||
if (dynamic_cast<const SortingStep *>(step) && node != nodes_affect_order.back())
|
||||
continue;
|
||||
|
||||
logStep("update sorting traits", node);
|
||||
|
||||
auto * step = node->step.get();
|
||||
auto * trans = dynamic_cast<ITransformingStep *>(step);
|
||||
if (!trans)
|
||||
{
|
||||
@ -262,15 +280,16 @@ private:
|
||||
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
|
||||
{
|
||||
const QueryPlan::Node * node = it->node;
|
||||
logStep("checking for stateful function", node);
|
||||
|
||||
/// walking though stack until reach node which affects order
|
||||
if (node == node_affect_order)
|
||||
break;
|
||||
|
||||
const auto * step = node->step.get();
|
||||
/// skip removed sorting steps
|
||||
if (dynamic_cast<const SortingStep*>(step))
|
||||
continue;
|
||||
|
||||
|
||||
logStep("checking for stateful function", node);
|
||||
if (const auto * expr = typeid_cast<const ExpressionStep *>(step); expr)
|
||||
{
|
||||
if (expr->getExpression()->hasStatefulFunctions())
|
||||
@ -296,13 +315,16 @@ private:
|
||||
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
|
||||
{
|
||||
const QueryPlan::Node * node = it->node;
|
||||
logStep("checking path from current sorting", node);
|
||||
|
||||
/// walking though stack until reach node which affects order
|
||||
if (node == node_affect_order)
|
||||
break;
|
||||
|
||||
const auto * step = node->step.get();
|
||||
/// skip removed sorting steps
|
||||
if (dynamic_cast<const SortingStep *>(step))
|
||||
continue;
|
||||
|
||||
logStep("checking path from current sorting", node);
|
||||
|
||||
/// (2) for window function we do ORDER BY in 2 Sorting steps,
|
||||
/// so do not delete Sorting if window function step is on top
|
||||
|
@ -0,0 +1,208 @@
|
||||
-- Disabled query_plan_remove_redundant_sorting
|
||||
-- ORDER BY clauses in subqueries are untouched
|
||||
Expression (Projection)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + Projection))
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + Projection))
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression (Before ORDER BY)
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- Enabled query_plan_remove_redundant_sorting
|
||||
-- ORDER BY removes ORDER BY clauses in subqueries
|
||||
-- query
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
-- explain
|
||||
Expression (Projection)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
2
|
||||
-- ORDER BY cannot remove ORDER BY in subquery WITH FILL
|
||||
-- query
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC WITH FILL STEP 1
|
||||
)
|
||||
ORDER BY number ASC
|
||||
-- explain
|
||||
Expression (Projection)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + Projection))
|
||||
Header: number UInt64
|
||||
Filling
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
2
|
||||
-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY
|
||||
-- query
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
LIMIT 1 BY number
|
||||
)
|
||||
ORDER BY number ASC
|
||||
-- explain
|
||||
Expression (Projection)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + Projection))
|
||||
Header: number UInt64
|
||||
LimitBy
|
||||
Header: number UInt64
|
||||
Expression (Before LIMIT BY)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
2
|
||||
-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed
|
||||
-- query
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
) AS t1,
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
) AS t2
|
||||
-- explain
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
Header: number UInt64
|
||||
t2.number UInt64
|
||||
Join (JOIN FillRightFirst)
|
||||
Header: number UInt64
|
||||
t2.number UInt64
|
||||
Expression ((Before JOIN + Projection))
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
Expression ((Joined actions + (Rename joined columns + Projection)))
|
||||
Header: t2.number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- execute
|
||||
0 2
|
||||
0 1
|
||||
0 0
|
||||
1 2
|
||||
1 1
|
||||
1 0
|
||||
2 2
|
||||
2 1
|
||||
2 0
|
||||
-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps
|
||||
-- query
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
GROUP BY number
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
-- explain
|
||||
Expression (Projection)
|
||||
Header: number UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
|
||||
Header: number UInt64
|
||||
Aggregating
|
||||
Header: number UInt64
|
||||
Expression (Before GROUP BY)
|
||||
Header: number UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
2
|
263
tests/queries/0_stateless/02496_remove_redundant_sorting.sh
Executable file
263
tests/queries/0_stateless/02496_remove_redundant_sorting.sh
Executable file
@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
DISABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=0;SET optimize_duplicate_order_by_and_distinct=0"
|
||||
ENABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=1;SET optimize_duplicate_order_by_and_distinct=0"
|
||||
|
||||
echo "-- Disabled query_plan_remove_redundant_sorting"
|
||||
echo "-- ORDER BY clauses in subqueries are untouched"
|
||||
query="SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC"
|
||||
$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN header=1 $query"
|
||||
|
||||
function explain_and_execute {
|
||||
echo "-- query"
|
||||
echo "$1"
|
||||
echo "-- explain"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN header=1 $1"
|
||||
echo "-- execute"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1"
|
||||
}
|
||||
|
||||
echo "-- Enabled query_plan_remove_redundant_sorting"
|
||||
echo "-- ORDER BY removes ORDER BY clauses in subqueries"
|
||||
explain_and_execute "$query"
|
||||
|
||||
echo "-- ORDER BY cannot remove ORDER BY in subquery WITH FILL"
|
||||
query="SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC WITH FILL STEP 1
|
||||
)
|
||||
ORDER BY number ASC"
|
||||
explain_and_execute "$query"
|
||||
|
||||
echo "-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY"
|
||||
query="SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
LIMIT 1 BY number
|
||||
)
|
||||
ORDER BY number ASC"
|
||||
explain_and_execute "$query"
|
||||
|
||||
echo "-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed"
|
||||
query="SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
) AS t1,
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
) AS t2"
|
||||
explain_and_execute "$query"
|
||||
|
||||
# SELECT '-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries';
|
||||
# EXPLAIN header=1
|
||||
# SELECT *
|
||||
# FROM
|
||||
# (
|
||||
# SELECT number
|
||||
# FROM
|
||||
# (
|
||||
# SELECT number
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
# ORDER BY number ASC
|
||||
# ) AS t1,
|
||||
# (
|
||||
# SELECT number
|
||||
# FROM
|
||||
# (
|
||||
# SELECT number
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# ORDER BY number DESC
|
||||
# ) AS t2
|
||||
# ORDER BY t1.number ASC;
|
||||
|
||||
# SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries';
|
||||
# EXPLAIN header=1
|
||||
# SELECT sum(number)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
# GROUP BY number;
|
||||
|
||||
# SELECT '-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery';
|
||||
# EXPLAIN header=1
|
||||
# SELECT any(number)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
# GROUP BY number;
|
||||
|
||||
# SELECT '-- query with aggregation function but w/o GROUP BY -> remove sorting';
|
||||
# EXPLAIN
|
||||
# SELECT sum(number)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(10)
|
||||
# ORDER BY number DESC
|
||||
# );
|
||||
|
||||
# SELECT '-- check that optimization is applied recursively to subqueries as well';
|
||||
# SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here';
|
||||
# EXPLAIN header=1
|
||||
# SELECT a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT sum(number) AS a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# GROUP BY number
|
||||
# )
|
||||
# ORDER BY a ASC;
|
||||
|
||||
# SELECT '-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function';
|
||||
# EXPLAIN header=1
|
||||
# SELECT a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT any(number) AS a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# GROUP BY number
|
||||
# )
|
||||
# ORDER BY a ASC;
|
||||
|
||||
# SELECT '-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY';
|
||||
# EXPLAIN header=1
|
||||
# SELECT a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT any(number) AS a
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(3)
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
# ORDER BY number ASC
|
||||
# )
|
||||
# GROUP BY number
|
||||
# )
|
||||
# WHERE a > 0;
|
||||
|
||||
# SELECT '-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function';
|
||||
# SET query_plan_enable_optimizations = 0;
|
||||
|
||||
# SELECT '-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order';
|
||||
# EXPLAIN
|
||||
# SELECT
|
||||
# number,
|
||||
# neighbor(number, 2)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(10)
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
# ORDER BY number ASC;
|
||||
|
||||
# SELECT '-- non-stateful function does _not_ prevent removing inner ORDER BY';
|
||||
# EXPLAIN
|
||||
# SELECT
|
||||
# number,
|
||||
# plus(number, 2)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT *
|
||||
# FROM numbers(10)
|
||||
# ORDER BY number DESC
|
||||
# )
|
||||
|
||||
echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps"
|
||||
query="SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
GROUP BY number
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC"
|
||||
explain_and_execute "$query"
|
Loading…
Reference in New Issue
Block a user