Merge pull request #58554 from ClickHouse/try-to-always-push-down-prewhere-from-query-plan

Simplify optimize-push-to-prewhere from query plan
2024-11-30 11:32:03 +00:00 · 2024-02-12 12:52:39 +01:00 · 2024-02-12 12:52:39 +01:00 · 09d3b3c0d0
commit 09d3b3c0d0
parent 1285bdbef1 306f504781
21 changed files with 488 additions and 496 deletions
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -1645,7 +1645,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second)
    }
 }

-ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
+ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping) const
 {
    /// Split DAG into two parts.
    /// (first_nodes, first_outputs) is a part which will have split_list in result.
@ -1779,13 +1779,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
                    }

                    /// Input from second DAG should also be in the first.
-                    if (copy.type == ActionType::INPUT)
-                    {
-                        auto & input_copy = first_nodes.emplace_back(*cur.node);
-                        assert(cur_data.to_first == nullptr);
-                        cur_data.to_first = &input_copy;
-                        new_inputs.push_back(cur.node);
-                    }
+                    // if (copy.type == ActionType::INPUT)
+                    // {
+                    //     auto & input_copy = first_nodes.emplace_back(*cur.node);
+                    //     assert(cur_data.to_first == nullptr);
+                    //     cur_data.to_first = &input_copy;
+                    //     new_inputs.push_back(cur.node);
+                    // }
                }
                else
                {
@ -1804,10 +1804,11 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
                        /// If this node is needed in result, add it as input.
                        Node input_node;
                        input_node.type = ActionType::INPUT;
-                        input_node.result_type = node.result_type;
-                        input_node.result_name = node.result_name;
+                        input_node.result_type = cur.node->result_type;
+                        input_node.result_name = cur.node->result_name;
                        cur_data.to_second = &second_nodes.emplace_back(std::move(input_node));

+                        if (cur.node->type != ActionType::INPUT)
                            new_inputs.push_back(cur.node);
                    }
                }
@ -1824,16 +1825,31 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
    for (const auto * input_node : inputs)
    {
        const auto & cur = data[input_node];
+        if (cur.to_first)
+        {
            first_inputs.push_back(cur.to_first);
+
+            if (cur.to_second)
+                first_outputs.push_back(cur.to_first);
+        }
    }

    for (const auto * input : new_inputs)
    {
        const auto & cur = data[input];
+        if (cur.to_second)
            second_inputs.push_back(cur.to_second);
+        if (cur.to_first)
            first_outputs.push_back(cur.to_first);
    }

+    for (const auto * input_node : inputs)
+    {
+        const auto & cur = data[input_node];
+        if (cur.to_second)
+            second_inputs.push_back(cur.to_second);
+    }
+
    auto first_actions = std::make_shared<ActionsDAG>();
    first_actions->nodes.swap(first_nodes);
    first_actions->outputs.swap(first_outputs);
@ -1844,7 +1860,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
    second_actions->outputs.swap(second_outputs);
    second_actions->inputs.swap(second_inputs);

-    return {std::move(first_actions), std::move(second_actions)};
+    std::unordered_map<const Node *, const Node *> split_nodes_mapping;
+    if (create_split_nodes_mapping)
+    {
+        for (const auto * node : split_nodes)
+            split_nodes_mapping[node] = data[node].to_first;
+    }
+
+    return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)};
 }

 ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@ -327,13 +327,18 @@ public:
    /// Merge current nodes with specified dag nodes
    void mergeNodes(ActionsDAG && second);

-    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
+    struct SplitResult
+    {
+        ActionsDAGPtr first;
+        ActionsDAGPtr second;
+        std::unordered_map<const Node *, const Node *> split_nodes_mapping;
+    };

    /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
    /// Execution of first then second parts on block is equivalent to execution of initial DAG.
    /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal outputs.
    /// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
-    SplitResult split(std::unordered_set<const Node *> split_nodes) const;
+    SplitResult split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping = false) const;

    /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
    SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
--- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
@ -66,7 +66,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
    NameSet sort_columns;
    for (const auto & col : sorting_step->getSortDescription())
        sort_columns.insert(col.column_name);
-    auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
+    auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);

    // No calculations can be postponed.
    if (unneeded_for_sorting->trivial())
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@ -5,68 +5,35 @@
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Planner/ActionsChain.h>
-#include <deque>
+#include "Functions/FunctionsLogical.h"
+#include "Functions/IFunctionAdaptors.h"

 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-namespace
-{
-
-void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
-{
-    std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
-    for (const auto * output_node : actions_dag->getOutputs())
-        output_name_to_node.emplace(output_node->result_name, output_node);
-
-    std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
-
-    ActionsDAG::NodeRawConstPtrs updated_outputs;
-    updated_outputs.reserve(expected_header.columns());
-
-    for (const auto & column : expected_header)
-    {
-        auto output_node_it = output_name_to_node.find(column.name);
-        if (output_node_it == output_name_to_node.end())
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Invalid move to PREWHERE optimization. Cannot find column {} in output",
-                column.name);
-
-        updated_outputs.push_back(output_node_it->second);
-        used_output_nodes.insert(output_node_it->second);
-    }
-
-    ActionsDAG::NodeRawConstPtrs unused_outputs;
-    for (const auto * output_node : actions_dag->getOutputs())
-    {
-        if (used_output_nodes.contains(output_node))
-            continue;
-
-        unused_outputs.push_back(output_node);
-    }
-
-    auto & actions_dag_outputs = actions_dag->getOutputs();
-    actions_dag_outputs = std::move(updated_outputs);
-    actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
-}
-
-}
-
-
 namespace QueryPlanOptimizations
 {

-void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
+static void removeFromOutput(ActionsDAG & dag, const std::string name)
+{
+    const auto * node = &dag.findInOutputs(name);
+    auto & outputs = dag.getOutputs();
+    for (size_t i = 0; i < outputs.size(); ++i)
+    {
+        if (node == outputs[i])
+        {
+            outputs.erase(outputs.begin() + i);
+            return;
+        }
+    }
+}
+
+void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
 {
    if (stack.size() < 3)
        return;

-    const auto & frame = stack.back();
+    auto & frame = stack.back();

    /** Assume that on stack there are at least 3 nodes:
      *
@ -82,60 +49,26 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
    if (storage_prewhere_info && storage_prewhere_info->prewhere_actions)
        return;

-    const QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
+    QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
    const auto * filter_step = typeid_cast<FilterStep *>(filter_node->step.get());
    if (!filter_step)
        return;

-    /** Collect required filter output columns.
-      * Collect output nodes that are mapped to input nodes.
-      * Collect input node to output nodes mapping.
-      */
-    ColumnsWithTypeAndName required_columns_after_filter;
-    std::unordered_set<std::string> output_nodes_mapped_to_input;
-    std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
-
-    for (const auto * output_node : filter_step->getExpression()->getOutputs())
-    {
-        const auto * node_without_alias = output_node;
-        while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
-            node_without_alias = node_without_alias->children[0];
-
-        if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
-        {
-            output_nodes_mapped_to_input.emplace(output_node->result_name);
-
-            auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
-            if (output_names_it == input_node_to_output_names.end())
-            {
-                auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
-                output_names_it = insert_it;
-            }
-
-            output_names_it->second.push_back(output_node->result_name);
-        }
-
-        if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
-            continue;
-
-        required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
-    }
-
    const auto & context = read_from_merge_tree->getContext();
    const auto & settings = context->getSettingsRef();

    if (!settings.allow_experimental_analyzer)
        return;

-    const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
-    bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal();
+    bool is_final = read_from_merge_tree->isQueryWithFinal();
    bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
    if (!optimize_move_to_prewhere)
        return;

    const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();

-    if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
+    ColumnsWithTypeAndName required_columns_after_filter;
+    if (read_from_merge_tree->isQueryWithSampling())
    {
        const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();
        const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes();
@ -170,7 +103,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
        filter_step->getFilterColumnName(),
        read_from_merge_tree->getContext(),
        is_final);
-    if (!optimize_result.has_value())
+
+    if (optimize_result.prewhere_nodes.empty())
        return;

    PrewhereInfoPtr prewhere_info;
@ -181,201 +115,85 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)

    prewhere_info->need_filter = true;

-    auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
+    auto filter_expression = filter_step->getExpression();
+    const auto & filter_column_name = filter_step->getFilterColumnName();

-    ActionsChain actions_chain;
-
-    std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
-    actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
-
-    auto & filter_actions = optimize_result->filter_actions;
-
-    /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
-      * 1. Filter expressions.
-      * 2. Prewhere filter expressions.
-      *
-      * There can be cases when all expressions are moved to PREWHERE, but it is not
-      * enough to produce required filter output columns.
-      *
-      * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
-      * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
-      * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
-      *
-      * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
-      */
-    bool need_additional_filter_after_prewhere = false;
-
-    if (!filter_actions)
+    if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn())
    {
-        /// Any node from PREWHERE filter actions can be used as possible output node
-        std::unordered_set<std::string> possible_prewhere_output_nodes;
-        for (const auto & node : prewhere_filter_actions->getNodes())
-            possible_prewhere_output_nodes.insert(node.result_name);
+        removeFromOutput(*filter_expression, filter_column_name);
+        auto & outputs = filter_expression->getOutputs();
+        size_t size = outputs.size();
+        outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end());
+        filter_expression->removeUnusedActions(false);
+        outputs.resize(size);
+    }

-        for (auto & required_column : required_columns_after_filter)
+    auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
+
+    /// This is the leak of abstraction.
+    /// Splited actions may have inputs which are needed only for PREWHERE.
+    /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
+    ///
+    /// See 00950_default_prewhere for example.
+    /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
+    /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
+    /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
+    /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
+    /// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result.
+    ///
+    /// So, here we restore removed inputs for PREWHERE actions
    {
-            if (!possible_prewhere_output_nodes.contains(required_column.name) &&
-                !output_nodes_mapped_to_input.contains(required_column.name))
+        std::unordered_set<const ActionsDAG::Node *> first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
+        for (const auto * input : split_result.first->getInputs())
        {
-                need_additional_filter_after_prewhere = true;
-                break;
+            if (!first_outputs.contains(input))
+            {
+                split_result.first->getOutputs().push_back(input);
+                /// Add column to second actions as input.
+                /// Do not add it to result, so it would be removed.
+                split_result.second->addInput(input->result_name, input->result_type);
            }
        }
    }

-    /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
-      * actions output columns as filter actions dag input columns.
-      * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
-      * PREWHERE filter actions.
-      */
-    if (need_additional_filter_after_prewhere || filter_actions)
+    ActionsDAG::NodeRawConstPtrs conditions;
+    conditions.reserve(split_result.split_nodes_mapping.size());
+    for (const auto * condition : optimize_result.prewhere_nodes)
+        conditions.push_back(split_result.split_nodes_mapping.at(condition));
+
+    prewhere_info->prewhere_actions = std::move(split_result.first);
+    prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
+
+    if (conditions.size() == 1)
    {
-        auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
-        merged_filter_actions->getOutputs().clear();
-        merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
-
-        /// Add old filter step filter column to outputs
-        for (const auto & node : merged_filter_actions->getNodes())
+        prewhere_info->prewhere_column_name = conditions.front()->result_name;
+        prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front());
+    }
+    else
    {
-            if (node.result_name == filter_step->getFilterColumnName())
-            {
-                merged_filter_actions->getOutputs().push_back(&node);
-                break;
+        prewhere_info->remove_prewhere_column = true;
+
+        FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+        const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
+        prewhere_info->prewhere_column_name = node->result_name;
+        prewhere_info->prewhere_actions->getOutputs().push_back(node);
    }
-        }
-
-        filter_actions = std::move(merged_filter_actions);
-
-        /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
-        prewhere_info->need_filter = false;
-
-        actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
-    }
-
-    auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
-    actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
-
-    actions_chain.finalize();
-
-    prewhere_filter_actions->projectInput(false);
-
-    auto & prewhere_actions_chain_node = actions_chain[0];
-    prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
-    prewhere_info->prewhere_column_name = prewere_filter_node_name;
-    prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);

    read_from_merge_tree->updatePrewhereInfo(prewhere_info);

-    QueryPlan::Node * replace_old_filter_node = nullptr;
-    bool remove_filter_node = false;
-
-    if (filter_actions)
+    if (!optimize_result.fully_moved_to_prewhere)
    {
-        filter_actions->projectInput(false);
-
-        /// Match dag output nodes with old filter step header
-        matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
-
-        auto & filter_actions_chain_node = actions_chain[1];
-        bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
-        auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
-            filter_actions,
+        filter_node->step = std::make_unique<FilterStep>(
+            read_from_merge_tree->getOutputStream(),
+            std::move(split_result.second),
            filter_step->getFilterColumnName(),
-            remove_filter_column);
-
-        auto & node = nodes.emplace_back();
-        node.children.emplace_back(frame.node);
-        node.step = std::move(after_prewhere_filter_step);
-
-        replace_old_filter_node = &node;
+            filter_step->removesFilterColumn());
    }
    else
    {
-        auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
-        bool apply_rename_step = false;
-
-        ActionsDAG::NodeRawConstPtrs updated_outputs;
-
-        /** If in output after read from merge tree there are column names without aliases,
-          * apply old filter step aliases to them.
-          */
-        for (const auto * output_node : rename_actions_dag->getOutputs())
-        {
-            const auto alias_it = input_node_to_output_names.find(output_node->result_name);
-            if (alias_it == input_node_to_output_names.end())
-            {
-                updated_outputs.push_back(output_node);
-                continue;
-            }
-
-            for (auto & output_name : alias_it->second)
-            {
-                if (output_name == output_node->result_name)
-                {
-                    updated_outputs.push_back(output_node);
-                    continue;
-                }
-
-                updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
-                apply_rename_step = true;
-            }
-        }
-
-        rename_actions_dag->getOutputs() = std::move(updated_outputs);
-
-        bool apply_match_step = false;
-
-        /// If column order does not match old filter step column order, match dag output nodes with header
-        if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
-        {
-            apply_match_step = true;
-            matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
-        }
-
-        if (apply_rename_step || apply_match_step)
-        {
-            auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
-            if (apply_rename_step)
-                rename_step->setStepDescription("Change column names to column identifiers");
-
-            auto & node = nodes.emplace_back();
-            node.children.emplace_back(frame.node);
-            node.step = std::move(rename_step);
-
-            replace_old_filter_node = &node;
-        }
-        else
-        {
-            replace_old_filter_node = frame.node;
-            remove_filter_node = true;
-        }
-    }
-
-    QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
-
-    for (auto & filter_parent_child : filter_parent_node->children)
-    {
-        if (filter_parent_child == filter_node)
-        {
-            filter_parent_child = replace_old_filter_node;
-
-            size_t stack_size = stack.size();
-
-            /** If filter step is completely replaced with PREWHERE filter actions, remove it from stack.
-              * Otherwise replace old filter step with new filter step after PREWHERE.
-              */
-            if (remove_filter_node)
-            {
-                std::swap(stack[stack_size - 1], stack[stack_size - 2]);
-                stack.pop_back();
-            }
-            else
-            {
-                stack[stack_size - 2] = Frame{.node = replace_old_filter_node, .next_child = 1};
-            }
-
-            break;
-        }
+        filter_node->step = std::make_unique<ExpressionStep>(
+            read_from_merge_tree->getOutputStream(),
+            std::move(split_result.second));
    }
 }

--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@ -118,6 +118,34 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
        optimizePrewhere(stack, nodes);
        optimizePrimaryKeyCondition(stack);

+        auto & frame = stack.back();
+
+        if (frame.next_child == 0)
+        {
+
+            if (optimization_settings.read_in_order)
+                optimizeReadInOrder(*frame.node, nodes);
+
+            if (optimization_settings.distinct_in_order)
+                tryDistinctReadInOrder(frame.node);
+        }
+
+        /// Traverse all children first.
+        if (frame.next_child < frame.node->children.size())
+        {
+            auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
+            ++frame.next_child;
+            stack.push_back(next_frame);
+            continue;
+        }
+
+        stack.pop_back();
+    }
+
+    stack.push_back({.node = &root});
+
+    while (!stack.empty())
+    {
        {
            /// NOTE: frame cannot be safely used after stack was modified.
            auto & frame = stack.back();
@ -126,19 +154,14 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
            {
                has_reading_from_mt |= typeid_cast<const ReadFromMergeTree *>(frame.node->step.get()) != nullptr;

-                if (optimization_settings.read_in_order)
-                    optimizeReadInOrder(*frame.node, nodes);
-
                /// Projection optimization relies on PK optimization
                if (optimization_settings.optimize_projection)
                    num_applied_projection
                        += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);

+
                if (optimization_settings.aggregation_in_order)
                    optimizeAggregationInOrder(*frame.node, nodes);
-
-                if (optimization_settings.distinct_in_order)
-                    tryDistinctReadInOrder(frame.node);
            }

            /// Traverse all children first.
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@ -14,19 +14,33 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
        return 0;

    const auto & expr = filter_step->getExpression();
+    const std::string & filter_column_name = filter_step->getFilterColumnName();

    /// Do not split if there are function like runningDifference.
    if (expr->hasStatefulFunctions())
        return 0;

-    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
+    bool filter_name_clashs_with_input = false;
+    if (filter_step->removesFilterColumn())
+    {
+        for (const auto * input : expr->getInputs())
+        {
+            if (input->result_name == filter_column_name)
+            {
+                filter_name_clashs_with_input = true;
+                break;
+            }
+        }
+    }
+
+    auto split = expr->splitActionsForFilter(filter_column_name);

    if (split.second->trivial())
        return 0;

    bool remove_filter = false;
    if (filter_step->removesFilterColumn())
-        remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
+        remove_filter = split.second->removeUnusedResult(filter_column_name);

    auto description = filter_step->getStepDescription();

@ -34,10 +48,25 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
    node->children.swap(filter_node.children);
    node->children.push_back(&filter_node);

+    std::string split_filter_name = filter_column_name;
+    if (filter_name_clashs_with_input)
+    {
+        split_filter_name = "__split_filter";
+
+        for (auto & filter_output : split.first->getOutputs())
+        {
+            if (filter_output->result_name == filter_column_name)
+            {
+                filter_output = &split.first->addAlias(*filter_output, split_filter_name);
+                break;
+            }
+        }
+    }
+
    filter_node.step = std::make_unique<FilterStep>(
            filter_node.children.at(0)->step->getOutputStream(),
            std::move(split.first),
-            filter_step->getFilterColumnName(),
+            std::move(split_filter_name),
            remove_filter);

    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -89,6 +89,34 @@ size_t countPartitions(const MergeTreeData::DataPartsVector & prepared_parts)
    return countPartitions(prepared_parts, get_partition_id);
 }

+bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs)
+{
+    std::unordered_set<const ActionsDAG::Node *> outputs(dag.getOutputs().begin(), dag.getOutputs().end());
+    bool added = false;
+    for (const auto * input : dag.getInputs())
+    {
+        if (inputs.contains(input->result_name) && !outputs.contains(input))
+        {
+            dag.getOutputs().push_back(input);
+            added = true;
+        }
+    }
+
+    return added;
+}
+
+bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
+{
+    bool added = false;
+    if (info.row_level_filter)
+        added = added || restoreDAGInputs(*info.row_level_filter, inputs);
+
+    if (info.prewhere_actions)
+        added = added || restoreDAGInputs(*info.prewhere_actions, inputs);
+
+    return added;
+}
+
 }

 namespace ProfileEvents
@ -786,18 +814,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
    /// To fix this, we prohibit removing any input in prewhere actions. Instead, projection actions will be added after sorting.
    /// See 02354_read_in_order_prewhere.sql as an example.
    bool have_input_columns_removed_after_prewhere = false;
-    if (prewhere_info && prewhere_info->prewhere_actions)
+    if (prewhere_info)
    {
-        auto & outputs = prewhere_info->prewhere_actions->getOutputs();
-        std::unordered_set<const ActionsDAG::Node *> outputs_set(outputs.begin(), outputs.end());
-        for (const auto * input : prewhere_info->prewhere_actions->getInputs())
-        {
-            if (!outputs_set.contains(input))
-            {
-                outputs.push_back(input);
-                have_input_columns_removed_after_prewhere = true;
-            }
-        }
+        NameSet sorting_columns;
+        for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
+            sorting_columns.insert(column.name);
+
+        have_input_columns_removed_after_prewhere = restorePrewhereInputs(*prewhere_info, sorting_columns);
    }

    /// Let's split ranges to avoid reading much data.
@ -984,7 +1007,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
            /// Thus we need to merge all partition parts into a single sorted stream.
            Pipe pipe = Pipe::unitePipes(std::move(pipes));
            merge_streams(pipe);
-            out_projection = createProjection(pipe_header);
            return pipe;
        }

@ -1133,6 +1155,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(

    auto sorting_expr = std::make_shared<ExpressionActions>(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());

+    if (prewhere_info)
+    {
+        NameSet sorting_columns;
+        for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
+            sorting_columns.insert(column.name);
+        restorePrewhereInputs(*prewhere_info, sorting_columns);
+    }
+
    for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
    {
        /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
@ -1804,13 +1834,20 @@ Pipe ReadFromMergeTree::spreadMarkRanges(

    if (!final && result.sampling.use_sampling)
    {
+        NameSet sampling_columns;
+
        /// Add columns needed for `sample_by_ast` to `column_names_to_read`.
        /// Skip this if final was used, because such columns were already added from PK.
        for (const auto & column : result.sampling.filter_expression->getRequiredColumns().getNames())
        {
            if (!names.contains(column))
                column_names_to_read.push_back(column);
+
+            sampling_columns.insert(column);
        }
+
+        if (prewhere_info)
+            restorePrewhereInputs(*prewhere_info, sampling_columns);
    }

    if (final)
@ -2004,6 +2041,24 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
        });
    }

+    /// Some extra columns could be added by sample/final/in-order/etc
+    /// Remove them from header if not needed.
+    if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header))
+    {
+        auto convert_actions_dag = ActionsDAG::makeConvertingActions(
+            pipe.getHeader().getColumnsWithTypeAndName(),
+            getOutputStream().header.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Name,
+            true);
+
+        auto converting_dag_expr = std::make_shared<ExpressionActions>(convert_actions_dag);
+
+        pipe.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<ExpressionTransform>(header, converting_dag_expr);
+        });
+    }
+
    for (const auto & processor : pipe.getProcessors())
        processors.emplace_back(processor);

--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@ -449,8 +449,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere
 Block MergeTreeSelectProcessor::transformHeader(
    Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
 {
+    injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
    auto transformed = applyPrewhereActions(std::move(block), prewhere_info);
-    injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns);
    return transformed;
 }

--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
    LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length));
 }

-std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
+MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
    const std::string & filter_column_name,
    const ContextPtr & context,
    bool is_final)
@ -132,11 +132,14 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
    if (!optimize_result)
        return {};

-    auto filter_actions = reconstructDAG(optimize_result->where_conditions);
-    auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions);
+    // if (optimize_result->where_conditions.empty())
+    //     return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};

-    FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) };
-    return result;
+    std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
+    for (const auto & condition : optimize_result->prewhere_conditions)
+        prewhere_conditions.insert(condition.node.getDAGNode());
+
+    return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()};
 }

 static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column)
@ -343,20 +346,6 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions)
    return function;
 }

-ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions)
-{
-    if (conditions.empty())
-        return {};
-
-    ActionsDAG::NodeRawConstPtrs filter_nodes;
-    filter_nodes.reserve(conditions.size());
-
-    for (const auto & condition : conditions)
-        filter_nodes.push_back(condition.node.getDAGNode());
-
-    return ActionsDAG::buildFilterActionsDAG(filter_nodes);
-}
-
 std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node,
    const WhereOptimizerContext & where_optimizer_context) const
 {
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@ -47,11 +47,11 @@ public:

    struct FilterActionsOptimizeResult
    {
-        ActionsDAGPtr filter_actions;
-        ActionsDAGPtr prewhere_filter_actions;
+        std::unordered_set<const ActionsDAG::Node *> prewhere_nodes;
+        bool fully_moved_to_prewhere = false;
    };

-    std::optional<FilterActionsOptimizeResult> optimize(const ActionsDAGPtr & filter_dag,
+    FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag,
        const std::string & filter_column_name,
        const ContextPtr & context,
        bool is_final);
@ -122,9 +122,6 @@ private:
    /// Reconstruct AST from conditions
    static ASTPtr reconstructAST(const Conditions & conditions);

-    /// Reconstruct DAG from conditions
-    static ActionsDAGPtr reconstructDAG(const Conditions & conditions);
-
    void optimizeArbitrary(ASTSelectQuery & select) const;

    UInt64 getColumnsSize(const NameSet & columns) const;
--- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
+++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
@ -28,7 +28,7 @@ Expression ((Projection + Before ORDER BY))
 Expression ((Project names + Projection))
  Filter ((WHERE + DROP unused columns after JOIN))
    Join (JOIN FillRightFirst)
-      Expression (Change column names to column identifiers)
+      Expression
        ReadFromMergeTree (default.t1)
        Indexes:
          PrimaryKey
--- a/tests/queries/0_stateless/01786_explain_merge_tree.reference
+++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference
@ -3,21 +3,18 @@
    MinMax
      Keys: 
        y
-      Condition: (y in [1, +Inf))
      Parts: 4/5
      Granules: 11/12
    Partition
      Keys: 
        y
        bitAnd(z, 3)
-      Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
      Parts: 3/4
      Granules: 10/11
    PrimaryKey
      Keys: 
        x
        y
-      Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
      Parts: 2/3
      Granules: 6/10
    Skip
@ -37,7 +34,6 @@
            {
              "Type": "MinMax",
              "Keys": ["y"],
-              "Condition": "(y in [1, +Inf))",
              "Initial Parts": 5,
              "Selected Parts": 4,
              "Initial Granules": 12,
@ -46,7 +42,6 @@
            {
              "Type": "Partition",
              "Keys": ["y", "bitAnd(z, 3)"],
-              "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
              "Initial Parts": 4,
              "Selected Parts": 3,
              "Initial Granules": 11,
@ -55,7 +50,6 @@
            {
              "Type": "PrimaryKey",
              "Keys": ["x", "y"],
-              "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
              "Initial Parts": 3,
              "Selected Parts": 2,
              "Initial Granules": 10,
@ -109,21 +103,18 @@
      MinMax
        Keys: 
          y
-        Condition: (y in [1, +Inf))
        Parts: 4/5
        Granules: 11/12
      Partition
        Keys: 
          y
          bitAnd(z, 3)
-        Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
        Parts: 3/4
        Granules: 10/11
      PrimaryKey
        Keys: 
          x
          y
-        Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
        Parts: 2/3
        Granules: 6/10
      Skip
@ -138,7 +129,6 @@
                {
                  "Type": "MinMax",
                  "Keys": ["y"],
-                  "Condition": "(y in [1, +Inf))",
                  "Initial Parts": 5,
                  "Selected Parts": 4,
                  "Initial Granules": 12,
@ -147,7 +137,6 @@
                {
                  "Type": "Partition",
                  "Keys": ["y", "bitAnd(z, 3)"],
-                  "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
                  "Initial Parts": 4,
                  "Selected Parts": 3,
                  "Initial Granules": 11,
@ -156,7 +145,6 @@
                {
                  "Type": "PrimaryKey",
                  "Keys": ["x", "y"],
-                  "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
                  "Initial Parts": 3,
                  "Selected Parts": 2,
                  "Initial Granules": 10,
--- a/tests/queries/0_stateless/01786_explain_merge_tree.sh
+++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh
@ -17,13 +17,13 @@ do

    $CH_CLIENT -q "
        explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14;
-        " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
+        " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"

    echo "-----------------"

    $CH_CLIENT -q "
        explain indexes = 1, json = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14 format TSVRaw;
-        " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
+        " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"

    echo "-----------------"

--- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
+++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
@ -76,7 +76,6 @@ ExpressionTransform
        (Expression)
        ExpressionTransform
          (ReadFromMergeTree)
-          ExpressionTransform
          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
 2020-10-11	0	0
 2020-10-11	0	10
@ -106,7 +105,6 @@ ExpressionTransform
          (Expression)
          ExpressionTransform
            (ReadFromMergeTree)
-            ExpressionTransform
            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
 2020-10-12	0
 2020-10-12	1
@ -140,7 +138,6 @@ ExpressionTransform
        (Expression)
        ExpressionTransform
          (ReadFromMergeTree)
-          ExpressionTransform
          ReverseTransform
            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
 2020-10-12	99999
--- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql
+++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql
@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000

 SELECT 'PREWHERE';
 SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000;
-SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 }
-SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 }
+SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 }
+SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 }
 SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
 SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
--- a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
+++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
@ -1,3 +1,5 @@
+-- { echoOn }
+explain pipeline select a from t1 group by a;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -15,6 +17,8 @@ ExpressionTransform × 16
                    Resize 3 → 1
                      MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 3 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t2 group by a;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -40,6 +44,8 @@ ExpressionTransform × 16
                                    Resize 2 → 1
                                      MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t3 group by a;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -82,6 +88,8 @@ ExpressionTransform × 16
                                                                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
 1000000
+-- { echoOn }
+explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -91,7 +99,6 @@ ExpressionTransform × 16
        (Expression)
        ExpressionTransform × 4
          (ReadFromMergeTree)
-          ExpressionTransform × 4
          MergingSortedTransform 2 → 1
            ExpressionTransform × 2
              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
@ -105,6 +112,8 @@ ExpressionTransform × 16
                              ExpressionTransform × 2
                                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -114,7 +123,6 @@ ExpressionTransform × 16
        (Expression)
        ExpressionTransform × 8
          (ReadFromMergeTree)
-          ExpressionTransform × 8
          MergingSortedTransform 2 → 1
            ExpressionTransform × 2
              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
@ -140,6 +148,8 @@ ExpressionTransform × 16
                                                      ExpressionTransform × 2
                                                        MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
  (Aggregating)
@ -148,7 +158,6 @@ ExpressionTransform × 16
      (Expression)
      ExpressionTransform × 16
        (ReadFromMergeTree)
-        ExpressionTransform × 16
        MergingSortedTransform 2 → 1
          ExpressionTransform × 2
            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
--- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql
+++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql
@ -15,7 +15,9 @@ system stop merges t1;
 insert into t1 select number from numbers_mt(1e6);
 insert into t1 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t1 group by a;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t1 group by a);

@ -28,7 +30,9 @@ system stop merges t2;
 insert into t2 select number from numbers_mt(1e6);
 insert into t2 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t2 group by a;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t2 group by a);

@ -41,7 +45,9 @@ system stop merges t3;
 insert into t3 select number from numbers_mt(1e6);
 insert into t3 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t3 group by a;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t3 group by a);

@ -63,7 +69,9 @@ system stop merges t4;
 insert into t4 select number from numbers_mt(1e6);
 insert into t4 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t4 group by a);

@ -76,7 +84,9 @@ system stop merges t5;
 insert into t5 select number from numbers_mt(1e6);
 insert into t5 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t5 group by a);

@ -89,7 +99,9 @@ system stop merges t6;
 insert into t6 select number from numbers_mt(1e6);
 insert into t6 select number from numbers_mt(1e6);

+-- { echoOn }
 explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }

 select count() from (select throwIf(count() != 2) from t6 group by a);

--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
@ -38,3 +38,40 @@
      Description: minmax GRANULARITY 1
      Parts: 0/0
      Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
@ -1,5 +1,3 @@
-SET allow_experimental_analyzer = 0;
-
 DROP TABLE IF EXISTS data_02771;


@ -24,6 +22,14 @@ SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';

 SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+
+SET allow_experimental_analyzer = 0;
+
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+
+SET allow_experimental_analyzer = 1;
+
 SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
 SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';

--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
@ -1,6 +1,6 @@
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    ReadFromMergeTree
    Indexes:
      PrimaryKey
        Keys: 
@ -9,9 +9,9 @@ CreatingSets (Create sets before main query execution)
        Condition: and((id in (-Inf, 10]), (value in 1-element set))
        Parts: 1/1
        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    ReadFromMergeTree
    Indexes:
      PrimaryKey
        Keys: 
@ -20,9 +20,9 @@ CreatingSets (Create sets before main query execution)
        Condition: and((id in (-Inf, 10]), (value in 1-element set))
        Parts: 1/1
        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    ReadFromMergeTree
    Indexes:
      PrimaryKey
        Keys: 
@ -31,9 +31,9 @@ CreatingSets (Create sets before main query execution)
        Condition: and((id in (-Inf, 10]), (value in 5-element set))
        Parts: 1/1
        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    ReadFromMergeTree
    Indexes:
      PrimaryKey
        Keys: 
@ -42,47 +42,51 @@ CreatingSets (Create sets before main query execution)
        Condition: and((id in (-Inf, 10]), (value in 5-element set))
        Parts: 1/1
        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
      Indexes:
        PrimaryKey
          Keys: 
            id
            value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+          Condition: and((value in 1-element set), (id in (-Inf, 10]))
          Parts: 1/1
          Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
      Indexes:
        PrimaryKey
          Keys: 
            id
            value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+          Condition: and((value in 1-element set), (id in (-Inf, 10]))
          Parts: 1/1
          Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
      Indexes:
        PrimaryKey
          Keys: 
            id
            value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+          Condition: and((value in 5-element set), (id in (-Inf, 10]))
          Parts: 1/1
          Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
      Indexes:
        PrimaryKey
          Keys: 
            id
            value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+          Condition: and((value in 5-element set), (id in (-Inf, 10]))
          Parts: 1/1
          Granules: 1/1
--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
@ -7,18 +7,18 @@ CREATE TABLE test_table

 INSERT INTO test_table SELECT number, number FROM numbers(10);

-SET allow_experimental_analyzer = 0;
+set allow_experimental_analyzer = 0;

-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));

-SET allow_experimental_analyzer = 1;
+set allow_experimental_analyzer = 1;

-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));

 DROP TABLE test_table;