From f011f721146fd2e6cef36fbf0dc46635a893379c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 14 Sep 2022 20:55:49 +0000 Subject: [PATCH] Try to apply optimization only if optimizer reaches pre distinct node + more comments --- src/Core/SortDescription.cpp | 2 +- src/Core/SortDescription.h | 2 +- .../QueryPlan/Optimizations/Optimizations.h | 2 +- .../Optimizations/distinctReadInOrder.cpp | 56 +++++++++---------- 4 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp index 66f5b76721b..59018fb13b4 100644 --- a/src/Core/SortDescription.cpp +++ b/src/Core/SortDescription.cpp @@ -42,7 +42,7 @@ void SortColumnDescription::explain(JSONBuilder::JSONMap & map) const map.add("With Fill", with_fill); } -size_t SortDescription::hasPrefix(const SortDescription & prefix) const +bool SortDescription::hasPrefix(const SortDescription & prefix) const { if (prefix.empty()) return true; diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 9eaf4ce1da3..a697323b593 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -121,7 +121,7 @@ public: size_t min_count_to_compile_sort_description = 3; bool compile_sort_description = false; - size_t hasPrefix(const SortDescription & prefix) const; + bool hasPrefix(const SortDescription & prefix) const; }; /** Compile sort description for header_types. diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 4f2e177aaf6..25825f2f5b9 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -54,7 +54,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: /// Update information about prefix sort description in SortingStep. size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); -/// TODO: description +/// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key size_t tryDistinctReadInOrder(QueryPlan::Node * node, QueryPlan::Nodes & nodes); inline const auto & getOptimizations() diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index f48340b0417..1f493331498 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -11,41 +11,34 @@ namespace DB::QueryPlanOptimizations { -/// size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) { - /// walk through the plan - /// (1) check if there is preliminary distinct node - /// (2) check if nodes below preliminary distinct preserve sorting - QueryPlan::Node * node = parent_node; + /// check if it is preliminary distinct node DistinctStep * pre_distinct = nullptr; - QueryPlan::Node * pre_distinct_node = nullptr; - while (!node->children.empty()) + if (auto * distinct = typeid_cast(parent_node->step.get()); distinct) { - if (pre_distinct) - { - /// check if nodes below DISTINCT preserve sorting - const auto * step = dynamic_cast(node->step.get()); - if (step) - { - const ITransformingStep::DataStreamTraits & traits = step->getDataStreamTraits(); - if (!traits.preserves_sorting) - return 0; - } - } - if (auto * tmp = typeid_cast(node->step.get()); tmp) - { - if (tmp->isPreliminary()) - { - pre_distinct_node = node; - pre_distinct = tmp; - } - } - node = node->children.front(); + if (distinct->isPreliminary()) + pre_distinct = distinct; } if (!pre_distinct) return 0; + /// walk through the plan + /// check if nodes below preliminary distinct preserve sorting + QueryPlan::Node * node = parent_node; + while (!node->children.empty()) + { + const auto * step = dynamic_cast(node->step.get()); + if (step) + { + const ITransformingStep::DataStreamTraits & traits = step->getDataStreamTraits(); + if (!traits.preserves_sorting) + return 0; + } + node = node->children.front(); + } + + /// check if we read from MergeTree auto * read_from_merge_tree = typeid_cast(node->step.get()); if (!read_from_merge_tree) return 0; @@ -55,7 +48,7 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (output.sort_scope != DataStream::SortScope::Chunk) return 0; - const SortDescription & sort_desc = output.sort_description; + /// find non-const columns in DISTINCT const auto & distinct_columns = pre_distinct->getOutputStream().header.getColumnsWithTypeAndName(); std::vector non_const_columns; non_const_columns.reserve(distinct_columns.size()); @@ -67,6 +60,7 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) /// apply optimization only when distinct columns match or form prefix of sorting key /// todo: check if reading in order optimization would be beneficial when sorting key is prefix of columns in DISTINCT + const SortDescription & sort_desc = output.sort_description; if (sort_desc.size() < non_const_columns.size()) return 0; @@ -83,14 +77,15 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (distinct_sort_desc.empty()) return 0; + /// update input order info in read_from_merge_tree step const int direction = 1; /// default direction, ASC InputOrderInfoPtr order_info = std::make_shared(distinct_sort_desc, distinct_sort_desc.size(), direction, pre_distinct->getLimitHint()); read_from_merge_tree->setQueryInfoInputOrderInfo(order_info); - /// update data stream's sorting properties + /// find all transforms between preliminary distinct step and ReadFromMergeTree std::vector steps2update; - node = pre_distinct_node; + node = parent_node; while (node && node->step.get() != read_from_merge_tree) { auto * transform = dynamic_cast(node->step.get()); @@ -103,6 +98,7 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) node = nullptr; } + /// update data stream's sorting properties for found transforms const DataStream * input_stream = &read_from_merge_tree->getOutputStream(); while (!steps2update.empty()) {