Merge pull request #43905 from ClickHouse/igor/remove_redundant_order_by

Remove redundant sorting
This commit is contained in:
Igor Nikonov 2023-01-18 13:25:03 +01:00 committed by GitHub
commit 72066846cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 866 additions and 15 deletions

View File

@ -587,6 +587,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \
\
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \

View File

@ -198,7 +198,7 @@ void CreateSetAndFilterOnTheFlyStep::updateOutputStream()
own_set->setHeader(getColumnSubset(input_streams[0].header, column_names));
output_stream = input_streams[0];
output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
}

View File

@ -58,6 +58,9 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
/// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key
size_t tryDistinctReadInOrder(QueryPlan::Node * node);
/// Remove redundant sorting
void tryRemoveRedundantSorting(QueryPlan::Node * root);
/// Put some steps under union, so that plan optimisation could be applied to union parts separately.
/// For example, the plan can be rewritten like:
/// - Something - - Expression - Something -

View File

@ -14,6 +14,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
settings.distinct_in_order = from.optimize_distinct_in_order;
settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order;
settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order;
settings.remove_redundant_sorting = from.query_plan_remove_redundant_sorting;
return settings;
}

View File

@ -30,6 +30,9 @@ struct QueryPlanOptimizationSettings
/// If aggregation-in-order optimisation is enabled
bool aggregation_in_order = false;
/// If removing redundant sorting is enabled, for example, ORDER BY clauses in subqueries
bool remove_redundant_sorting = true;
static QueryPlanOptimizationSettings fromSettings(const Settings & from);
static QueryPlanOptimizationSettings fromContext(ContextPtr from);
};

View File

@ -0,0 +1,328 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Interpreters/FullSortingMergeJoin.h>
#include <Processors/QueryPlan/AggregatingStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/FillingStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/LimitByStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Processors/QueryPlan/ReadFromRemote.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/WindowStep.h>
#include <Common/logger_useful.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
{
template <typename Derived, bool debug_logging = false>
class QueryPlanVisitor
{
protected:
struct FrameWithParent
{
QueryPlan::Node * node = nullptr;
QueryPlan::Node * parent_node = nullptr;
size_t next_child = 0;
};
using StackWithParent = std::vector<FrameWithParent>;
QueryPlan::Node * root = nullptr;
StackWithParent stack;
public:
explicit QueryPlanVisitor(QueryPlan::Node * root_) : root(root_) { }
void visit()
{
stack.push_back({.node = root});
while (!stack.empty())
{
auto & frame = stack.back();
QueryPlan::Node * current_node = frame.node;
QueryPlan::Node * parent_node = frame.parent_node;
logStep("back", current_node);
/// top-down visit
if (0 == frame.next_child)
{
logStep("top-down", current_node);
if (!visitTopDown(current_node, parent_node))
continue;
}
/// Traverse all children
if (frame.next_child < frame.node->children.size())
{
auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node};
++frame.next_child;
logStep("push", next_frame.node);
stack.push_back(next_frame);
continue;
}
/// bottom-up visit
logStep("bottom-up", current_node);
visitBottomUp(current_node, parent_node);
logStep("pop", current_node);
stack.pop_back();
}
}
bool visitTopDown(QueryPlan::Node * current_node, QueryPlan::Node * parent_node)
{
return getDerived().visitTopDownImpl(current_node, parent_node);
}
void visitBottomUp(QueryPlan::Node * current_node, QueryPlan::Node * parent_node)
{
getDerived().visitBottomUpImpl(current_node, parent_node);
}
private:
Derived & getDerived() { return *static_cast<Derived *>(this); }
const Derived & getDerived() const { return *static_cast<Derived *>(this); }
protected:
void logStep(const char * prefix, const QueryPlan::Node * node)
{
if constexpr (debug_logging)
{
const IQueryPlanStep * current_step = node->step.get();
LOG_DEBUG(
&Poco::Logger::get("QueryPlanVisitor"),
"{}: {}: {}",
prefix,
current_step->getName(),
reinterpret_cast<const void *>(current_step));
}
}
};
constexpr bool debug_logging_enabled = false;
class RemoveRedundantSorting : public QueryPlanVisitor<RemoveRedundantSorting, debug_logging_enabled>
{
/// stack with nodes which affect order
/// nodes added when traversing top-down
/// as soon as all children for the node on top of stack are traversed, the node is removed from stack
std::vector<QueryPlan::Node *> nodes_affect_order;
public:
explicit RemoveRedundantSorting(QueryPlan::Node * root_) : QueryPlanVisitor<RemoveRedundantSorting, debug_logging_enabled>(root_) { }
bool visitTopDownImpl(QueryPlan::Node * current_node, QueryPlan::Node * parent_node)
{
IQueryPlanStep * current_step = current_node->step.get();
/// if there is parent node which can affect order and current step is sorting
/// then check if we can remove the sorting step (and corresponding expression step)
if (!nodes_affect_order.empty() && typeid_cast<SortingStep *>(current_step))
{
if (tryRemoveSorting(current_node, parent_node))
{
logStep("step affect sorting", nodes_affect_order.back());
logStep("removed from plan", current_node);
auto & frame = stack.back();
/// mark removed node as visited
frame.next_child = frame.node->children.size();
/// current sorting step has been removed from plan, its parent has new children, need to visit them
auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node};
stack.push_back(next_frame);
logStep("push", next_frame.node);
return false;
}
}
if (typeid_cast<LimitStep *>(current_step)
|| typeid_cast<LimitByStep *>(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable
|| typeid_cast<FillingStep *>(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable
|| typeid_cast<SortingStep *>(current_step) /// (3) ORDER BY will change order of previous sorting
|| typeid_cast<AggregatingStep *>(current_step)) /// (4) aggregation change order
{
logStep("nodes_affect_order/push", current_node);
nodes_affect_order.push_back(current_node);
}
return true;
}
void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node *)
{
/// we come here when all children of current_node are visited,
/// so, if it's a node which affect order, remove it from the corresponding stack
if (!nodes_affect_order.empty() && nodes_affect_order.back() == current_node)
{
logStep("nodes_affect_order/pop", current_node);
nodes_affect_order.pop_back();
}
}
private:
bool tryRemoveSorting(QueryPlan::Node * sorting_node, QueryPlan::Node * parent_node)
{
if (!canRemoveCurrentSorting())
return false;
/// remove sorting
parent_node->children.front() = sorting_node->children.front();
/// sorting removed, so need to update sorting traits for upstream steps
const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream();
chassert(parent_node == (stack.rbegin() + 1)->node); /// skip element on top of stack since it's sorting which was just removed
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
{
const QueryPlan::Node * node = it->node;
logStep("update sorting traits", node);
auto * step = node->step.get();
auto * trans = dynamic_cast<ITransformingStep *>(step);
if (!trans)
{
logStep("stop update sorting traits: node is not transforming step", node);
break;
}
trans->updateInputStream(*input_stream);
input_stream = &trans->getOutputStream();
/// update sorting properties though stack until reach node which affects order (inclusive)
if (node == nodes_affect_order.back())
{
logStep("stop update sorting traits: reached node which affect order", node);
break;
}
}
return true;
}
bool canRemoveCurrentSorting()
{
chassert(!stack.empty());
chassert(typeid_cast<const SortingStep *>(stack.back().node->step.get()));
return checkNodeAffectingOrder(nodes_affect_order.back()) && checkPathFromCurrentSortingNode(nodes_affect_order.back());
}
static bool checkNodeAffectingOrder(QueryPlan::Node * node_affect_order)
{
IQueryPlanStep * step_affect_order = node_affect_order->step.get();
/// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable
/// if ORDER BY is with FILL WITH, it is non-removable
if (typeid_cast<LimitStep *>(step_affect_order) || typeid_cast<LimitByStep *>(step_affect_order)
|| typeid_cast<FillingStep *>(step_affect_order))
return false;
/// (1) aggregation
if (const AggregatingStep * parent_aggr = typeid_cast<AggregatingStep *>(step_affect_order); parent_aggr)
{
if (parent_aggr->inOrder())
return false;
auto const & aggregates = parent_aggr->getParams().aggregates;
for (const auto & aggregate : aggregates)
{
auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName());
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
return false;
}
return true;
}
/// (2) sorting
else if (const auto * next_sorting = typeid_cast<const SortingStep *>(step_affect_order); next_sorting)
{
if (next_sorting->getType() == SortingStep::Type::Full)
return true;
}
return false;
}
bool checkPathFromCurrentSortingNode(const QueryPlan::Node * node_affect_order)
{
chassert(!stack.empty());
chassert(typeid_cast<const SortingStep *>(stack.back().node->step.get()));
/// (1) if there is expression with stateful function between current step
/// and step which affects order, then we need to keep sorting since
/// stateful function output can depend on order
/// skip element on top of stack since it's sorting
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
{
const QueryPlan::Node * node = it->node;
logStep("checking for stateful function", node);
/// walking though stack until reach node which affects order
if (node == node_affect_order)
break;
const auto * step = node->step.get();
if (const auto * expr = typeid_cast<const ExpressionStep *>(step); expr)
{
if (expr->getExpression()->hasStatefulFunctions())
return false;
}
else if (const auto * filter = typeid_cast<const FilterStep *>(step); filter)
{
if (filter->getExpression()->hasStatefulFunctions())
return false;
}
else
{
const auto * trans = dynamic_cast<const ITransformingStep *>(step);
if (!trans)
break;
if (!trans->getDataStreamTraits().preserves_sorting)
break;
}
}
/// check steps on stack if there are some which can prevent from removing SortingStep
for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
{
const QueryPlan::Node * node = it->node;
logStep("checking path from current sorting", node);
/// walking though stack until reach node which affects order
if (node == node_affect_order)
break;
const auto * step = node->step.get();
/// (2) for window function we do ORDER BY in 2 Sorting steps,
/// so do not delete Sorting if window function step is on top
if (typeid_cast<const WindowStep *>(step))
return false;
if (const auto * join_step = typeid_cast<const JoinStep *>(step); join_step)
{
if (typeid_cast<const FullSortingMergeJoin *>(join_step->getJoin().get()))
return false;
}
}
return true;
}
};
void tryRemoveRedundantSorting(QueryPlan::Node * root)
{
RemoveRedundantSorting(root).visit();
}
}

View File

@ -447,6 +447,12 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings)
{
/// optimization need to be applied before "mergeExpressions" optimization
/// it removes redundant sorting steps, but keep underlying expressions,
/// so "mergeExpressions" optimization handles them afterwards
if (optimization_settings.remove_redundant_sorting)
QueryPlanOptimizations::tryRemoveRedundantSorting(root);
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
}

View File

@ -282,7 +282,16 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
{
/// skip sorting if stream is already sorted
if (input_sort_mode == DataStream::SortScope::Global && input_sort_desc.hasPrefix(result_description))
{
if (pipeline.getNumStreams() != 1)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"If input stream is globally sorted then there should be only 1 input stream at this stage. Number of input streams: "
"{}",
pipeline.getNumStreams());
return;
}
/// merge sorted
if (input_sort_mode == DataStream::SortScope::Stream && input_sort_desc.hasPrefix(result_description))

View File

@ -1,4 +1,8 @@
<test>
<settings>
<query_plan_remove_redundant_order_by>0</query_plan_remove_redundant_order_by>
</settings>
<create_query>CREATE TABLE rand_unlimited_10m_8 (key UInt8) Engine = Memory</create_query>
<create_query>CREATE TABLE rand_1k_10m_16 (key UInt16) Engine = Memory</create_query>

View File

@ -4,4 +4,5 @@ SELECT * FROM system.numbers ORDER BY number; -- { serverError 396 }
SET sort_overflow_mode = 'break';
SET max_block_size = 1000;
set query_plan_remove_redundant_sorting=0; -- to keep sorting in the query below
SELECT count() >= 100 AND count() <= 1000 FROM (SELECT * FROM system.numbers ORDER BY number);

View File

@ -1,3 +1,11 @@
set query_plan_remove_redundant_sorting=0; -- disable it for now since test with Float64 is failing with it
-- while debugging I observe incorrect behavior which can affect the current test result
-- but it's still unclear to the test is not failing w/o the optimization
-- SELECT CAST('9007199254740992', 'Float64') + CAST('1', 'Float64')
-- ┌─plus(CAST('9007199254740992', 'Float64'), CAST('1', 'Float64'))─┐
-- │ 9007199254740992 │
-- └─────────────────────────────────────────────────────────────────┘
-- Integer types are added as integers
SELECT toTypeName(sumCount(v)), sumCount(v) FROM
(

View File

@ -1,5 +1,5 @@
-- EXPLAIN PLAN sorting for MergeTree w/o sorting key
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
Sorting (Global): a ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): a ASC
@ -21,49 +21,49 @@ MergeSortingTransform × 3
LimitsCheckingTransform × 3
PartialSortingTransform × 3
-- ExpressionStep preserves sort mode
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
Sorting (Global): a ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): a ASC
Sorting (Chunk): a ASC
Sorting (Stream): a ASC
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1
Sorting (None)
Sorting (Sorting for ORDER BY)
Sorting (Global): plus(a, 1) ASC
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
-- ExpressionStep breaks sort mode
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1
Sorting (Global): plus(a, 1) ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): plus(a, 1) ASC
Sorting (None)
Sorting (Chunk): a ASC
-- FilterStep preserves sort mode
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
-- FilterStep breaks sort mode
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0
Sorting (None)
Sorting (None)
Sorting (Chunk): a ASC
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0
Sorting (None)
Sorting (None)
Sorting (Chunk): a ASC
-- aliases break sorting order
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a
Sorting (Global): a ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): a ASC
@ -73,14 +73,14 @@ Sorting (Global): a ASC
Sorting (Chunk): a ASC
Sorting (Stream): a ASC
-- aliases DONT break sorting order
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)
Sorting (Global): x ASC, y ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): x ASC, y ASC
Sorting (Chunk): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
-- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1)
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1
Sorting (None)
Sorting (Sorting for ORDER BY)
Sorting (Global): plus(a, 1) ASC
@ -90,7 +90,7 @@ Sorting (Global): plus(a, 1) ASC
Sorting (Chunk): a ASC
Sorting (Chunk): a ASC
-- check that correct sorting info is provided in case of only prefix of sorting key is in ORDER BY clause but all sorting key columns returned by query
-- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a
-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a
Sorting (Global): a ASC
Sorting (Sorting for ORDER BY)
Sorting (Global): a ASC

View File

@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
DISABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=0;set query_plan_read_in_order=0;set max_threads=3"
ENABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=1;set query_plan_read_in_order=1;set optimize_read_in_order=1;set max_threads=3"
MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3"
MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0"
GREP_SORTING="grep 'PartialSortingTransform\|LimitsCheckingTransform\|MergeSortingTransform\|MergingSortedTransform'"
GREP_SORTMODE="grep 'Sorting ('"
TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'"

View File

@ -1,5 +1,13 @@
-- Tags: no-parallel
drop table if exists pr_t;
drop table if exists dist_pr_t;
drop table if exists dist_t_different_dbs;
drop table if exists shard_1.t_different_dbs;
drop table if exists t_different_dbs;
drop table if exists dist_t;
drop table if exists t;
create table t(a UInt64, b UInt64) engine=MergeTree order by a;
system stop merges t;
insert into t select number, number from numbers_mt(1e6);
@ -64,6 +72,7 @@ select a, count() from dist_pr_t group by a, b order by a limit 5 offset 500;
-- { echoOff } --
drop table pr_t;
drop table dist_pr_t;
drop table dist_t_different_dbs;
drop table shard_1.t_different_dbs;

View File

@ -31,6 +31,7 @@ select * from (explain pipeline select sum(x) from t settings max_threads=4, max
Resize 32 → 16
MergeTreeThread × 32 0 → 1
-- For read-in-order, disable everything
set query_plan_remove_redundant_sorting=0; -- to keep reading in order
select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1;
49999995000000
select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%';

View File

@ -1,3 +1,4 @@
drop table if exists t;
create table t (x UInt64) engine = MergeTree order by x;
insert into t select number from numbers_mt(10000000) settings max_insert_threads=8;
@ -20,7 +21,11 @@ select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=
select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%';
-- For read-in-order, disable everything
set query_plan_remove_redundant_sorting=0; -- to keep reading in order
select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1;
select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%';
select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1;
select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%';
-- { echoOff }
drop table t;

View File

@ -0,0 +1,209 @@
-- Disable query_plan_remove_redundant_sorting
-- ORDER BY clauses in subqueries are untouched
Expression (Project names)
Header: number UInt64
Sorting (Sorting for ORDER BY)
Header: number_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_0 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + Change column names to column identifiers)))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- Enable query_plan_remove_redundant_sorting
-- ORDER BY removes ORDER BY clauses in subqueries
Expression (Project names)
Header: number UInt64
Sorting (Sorting for ORDER BY)
Header: number_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))))
Header: number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
0
1
2
-- ORDER BY cannot remove ORDER BY in subquery WITH FILL
Expression (Project names)
Header: number UInt64
Sorting (Sorting for ORDER BY)
Header: number_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_0 UInt64
Filling
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY
Expression (Project names)
Header: number UInt64
Sorting (Sorting for ORDER BY)
Header: number_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names))))
Header: number_0 UInt64
LimitBy
Header: number_1 UInt64
Expression (Before LIMIT BY)
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed
Expression ((Project names + (Projection + DROP unused columns after JOIN)))
Header: t1.number UInt64
t2.number UInt64
Join (JOIN FillRightFirst)
Header: t1.number_0 UInt64
t2.number_1 UInt64
Expression ((Change column names to column identifiers + Project names))
Header: t1.number_0 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
Expression ((Change column names to column identifiers + Project names))
Header: t2.number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_4 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_4 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries
Expression (Project names)
Header: t1.number UInt64
t2.number UInt64
Sorting (Sorting for ORDER BY)
Header: t1.number_0 UInt64
t2.number_1 UInt64
Expression ((Before ORDER BY + (Projection + DROP unused columns after JOIN)))
Header: t1.number_0 UInt64
t2.number_1 UInt64
Join (JOIN FillRightFirst)
Header: t1.number_0 UInt64
t2.number_1 UInt64
Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))
Header: t1.number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))
Header: t2.number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries
Expression ((Project names + Projection))
Header: sum(number) UInt64
Aggregating
Header: number_0 UInt64
sum(number_0) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))))
Header: number_0 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery
Expression ((Project names + Projection))
Header: any(number) UInt64
Aggregating
Header: number_0 UInt64
any(number_0) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Header: number_0 UInt64
Sorting (Sorting for ORDER BY)
Header: number_1 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- query with aggregation function but w/o GROUP BY -> remove sorting
Expression ((Project names + Projection))
Aggregating
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))
ReadFromStorage (SystemNumbers)
-- check that optimization is applied recursively to subqueries as well
-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here
Expression (Project names)
Header: a UInt64
Sorting (Sorting for ORDER BY)
Header: a_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
Header: a_0 UInt64
Aggregating
Header: number_1 UInt64
sum(number_1) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))
Header: number_1 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function
Expression (Project names)
Header: a UInt64
Sorting (Sorting for ORDER BY)
Header: a_0 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
Header: a_0 UInt64
Aggregating
Header: number_1 UInt64
any(number_1) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + Change column names to column identifiers)))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY
Expression ((Project names + Projection))
Header: a UInt64
Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection))))
Header: a_0 UInt64
Aggregating
Header: number_1 UInt64
any(number_1) UInt64
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Header: number_1 UInt64
Sorting (Sorting for ORDER BY)
Header: number_2 UInt64
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
Header: number_2 UInt64
ReadFromStorage (SystemNumbers)
Header: number UInt64
-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function
-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
ReadFromStorage (SystemNumbers)
-- non-stateful function does _not_ prevent removing inner ORDER BY
Expression (Project names)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
Expression (Project names)
Expression (Before ORDER BY)
Expression (Projection)
Expression (Change column names to column identifiers)
ReadFromStorage (SystemNumbers)

View File

@ -0,0 +1,263 @@
SET allow_experimental_analyzer=1;
SET optimize_duplicate_order_by_and_distinct=0;
SELECT '-- Disable query_plan_remove_redundant_sorting';
SET query_plan_remove_redundant_sorting=0;
SELECT '-- ORDER BY clauses in subqueries are untouched';
EXPLAIN header=1
SELECT *
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
)
ORDER BY number ASC;
SELECT '-- Enable query_plan_remove_redundant_sorting';
SET query_plan_remove_redundant_sorting=1;
SELECT '-- ORDER BY removes ORDER BY clauses in subqueries';
EXPLAIN header=1
SELECT *
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
)
ORDER BY number ASC;
SELECT *
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
)
ORDER BY number ASC;
SELECT '-- ORDER BY cannot remove ORDER BY in subquery WITH FILL';
EXPLAIN header=1
SELECT *
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number DESC
)
ORDER BY number ASC WITH FILL STEP 1
)
ORDER BY number ASC;
SELECT '-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY';
EXPLAIN header=1
SELECT *
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number DESC
)
ORDER BY number ASC
LIMIT 1 BY number
)
ORDER BY number ASC;
SELECT '-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed';
EXPLAIN header=1
SELECT *
FROM
(
SELECT number
FROM
(
SELECT number
FROM numbers(3)
ORDER BY number DESC
)
ORDER BY number ASC
) AS t1,
(
SELECT number
FROM
(
SELECT number
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
) AS t2;
SELECT '-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries';
EXPLAIN header=1
SELECT *
FROM
(
SELECT number
FROM
(
SELECT number
FROM numbers(3)
ORDER BY number DESC
)
ORDER BY number ASC
) AS t1,
(
SELECT number
FROM
(
SELECT number
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
) AS t2
ORDER BY t1.number ASC;
SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries';
EXPLAIN header=1
SELECT sum(number)
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
)
GROUP BY number;
SELECT '-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery';
EXPLAIN header=1
SELECT any(number)
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
ORDER BY number DESC
)
GROUP BY number;
SELECT '-- query with aggregation function but w/o GROUP BY -> remove sorting';
EXPLAIN
SELECT sum(number)
FROM
(
SELECT *
FROM numbers(10)
ORDER BY number DESC
);
SELECT '-- check that optimization is applied recursively to subqueries as well';
SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here';
EXPLAIN header=1
SELECT a
FROM
(
SELECT sum(number) AS a
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
GROUP BY number
)
ORDER BY a ASC;
SELECT '-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function';
EXPLAIN header=1
SELECT a
FROM
(
SELECT any(number) AS a
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number ASC
)
GROUP BY number
)
ORDER BY a ASC;
SELECT '-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY';
EXPLAIN header=1
SELECT a
FROM
(
SELECT any(number) AS a
FROM
(
SELECT *
FROM
(
SELECT *
FROM numbers(3)
ORDER BY number DESC
)
ORDER BY number ASC
)
GROUP BY number
)
WHERE a > 0;
SELECT '-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function';
SET query_plan_enable_optimizations = 0;
SELECT '-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order';
EXPLAIN
SELECT
number,
neighbor(number, 2)
FROM
(
SELECT *
FROM numbers(10)
ORDER BY number DESC
)
ORDER BY number ASC;
SELECT '-- non-stateful function does _not_ prevent removing inner ORDER BY';
EXPLAIN
SELECT
number,
plus(number, 2)
FROM
(
SELECT *
FROM numbers(10)
ORDER BY number DESC
)
ORDER BY number ASC;