From 7e510d9602d089241203f4e5e979f43996f664c8 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 22 Aug 2023 16:24:41 +0000
Subject: [PATCH 001/204] Analyzer: always qualify execution names

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp |  2 +
 src/Analyzer/createUniqueTableAliases.cpp | 62 +++++++++++++++++++++++
 src/Analyzer/createUniqueTableAliases.h   | 18 +++++++
 src/Planner/PlannerContext.cpp            |  9 ++--
 4 files changed, 88 insertions(+), 3 deletions(-)
 create mode 100644 src/Analyzer/createUniqueTableAliases.cpp
 create mode 100644 src/Analyzer/createUniqueTableAliases.h
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 0c3dc5e70d7..1803cc901c5 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -51,6 +51,7 @@
 
 #include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 
+#include <Analyzer/createUniqueTableAliases.h>
 #include <Analyzer/Utils.h>
 #include <Analyzer/SetUtils.h>
 #include <Analyzer/AggregationUtils.h>
@@ -7174,6 +7175,7 @@ void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context
 {
     QueryAnalyzer analyzer;
     analyzer.resolve(query_tree_node, table_expression, context);
+    createUniqueTableAliases(query_tree_node, context);
 }
 
 }
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
new file mode 100644
index 00000000000..bd97a7d69aa
--- /dev/null
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -0,0 +1,62 @@
+#include <unordered_map>
+#include <Analyzer/createUniqueTableAliases.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/IQueryTreeNode.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class CreateUniqueTableAliasesVisitor : public InDepthQueryTreeVisitorWithContext<CreateUniqueTableAliasesVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<CreateUniqueTableAliasesVisitor>;
+
+    explicit CreateUniqueTableAliasesVisitor(const ContextPtr & context)
+        : Base(context)
+    {}
+
+    void enterImpl(QueryTreeNodePtr & node)
+    {
+        switch (node->getNodeType())
+        {
+            case QueryTreeNodeType::QUERY:
+                [[fallthrough]];
+            case QueryTreeNodeType::UNION:
+                if (getSubqueryDepth() == 1)
+                    break;
+                [[fallthrough]];
+            case QueryTreeNodeType::TABLE:
+                [[fallthrough]];
+            case QueryTreeNodeType::TABLE_FUNCTION:
+                [[fallthrough]];
+            case QueryTreeNodeType::ARRAY_JOIN:
+            {
+                auto & alias = table_expression_to_alias[node];
+                if (alias.empty())
+                {
+                    alias = fmt::format("__table{}", table_expression_to_alias.size());
+                    node->setAlias(alias);
+                }
+                break;
+            }
+            default:
+                break;
+        }
+    }
+private:
+    // We need to use raw pointer as a key, not a QueryTreeNodePtrWithHash.
+    std::unordered_map<QueryTreeNodePtr, String> table_expression_to_alias;
+};
+
+}
+
+
+void createUniqueTableAliases(QueryTreeNodePtr & node, const ContextPtr & context)
+{
+    CreateUniqueTableAliasesVisitor(context).visit(node);
+}
+
+}
diff --git a/src/Analyzer/createUniqueTableAliases.h b/src/Analyzer/createUniqueTableAliases.h
new file mode 100644
index 00000000000..815fafaebca
--- /dev/null
+++ b/src/Analyzer/createUniqueTableAliases.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <memory>
+#include <Interpreters/Context_fwd.h>
+
+class IQueryTreeNode;
+using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
+
+namespace DB
+{
+
+/*
+ * For each table expression in the Query Tree generate and add a unique alias.
+ * If table expression had an alias in initial query tree, override it.
+ */
+void createUniqueTableAliases(QueryTreeNodePtr & node, const ContextPtr & context);
+
+}
diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp
index 0fde034b87a..422c8c1d01f 100644
--- a/src/Planner/PlannerContext.cpp
+++ b/src/Planner/PlannerContext.cpp
@@ -20,12 +20,15 @@ const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const Quer
     return createColumnIdentifier(column_node_typed.getColumn(), column_source_node);
 }
 
-const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & /*column_source_node*/)
+const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & column_source_node)
 {
     std::string column_identifier;
 
-    column_identifier += column.name;
-    column_identifier += '_' + std::to_string(column_identifiers.size());
+    const auto & source_alias = column_source_node->getAlias();
+    if (!source_alias.empty())
+        column_identifier = source_alias + "." + column.name;
+    else
+        column_identifier = column.name;
 
     auto [it, inserted] = column_identifiers.emplace(column_identifier);
     assert(inserted);

From 7f916c7069338ae07a5e7a38c8d4c0d6cd91c09b Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 22 Aug 2023 17:14:24 +0000
Subject: [PATCH 002/204] Fix table aliases for the queries sent to the shards

---
 src/Analyzer/createUniqueTableAliases.cpp | 5 +++++
 src/Storages/buildQueryTreeForShard.cpp   | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index bd97a7d69aa..782f2f35749 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -25,9 +25,14 @@ public:
             case QueryTreeNodeType::QUERY:
                 [[fallthrough]];
             case QueryTreeNodeType::UNION:
+            {
                 if (getSubqueryDepth() == 1)
+                {
+                    node->removeAlias();
                     break;
+                }
                 [[fallthrough]];
+            }
             case QueryTreeNodeType::TABLE:
                 [[fallthrough]];
             case QueryTreeNodeType::TABLE_FUNCTION:
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 74f2709f458..3bbb1a584ef 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -1,6 +1,7 @@
 
 #include <Storages/buildQueryTreeForShard.h>
 
+#include <Analyzer/createUniqueTableAliases.h>
 #include <Analyzer/ColumnNode.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/IQueryTreeNode.h>
@@ -372,6 +373,8 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
 
     removeGroupingFunctionSpecializations(query_tree_to_modify);
 
+    createUniqueTableAliases(query_tree_to_modify, planner_context->getQueryContext());
+
     return query_tree_to_modify;
 }
 

From aec0f3ede3a2bba1a800e643b999de064977e12a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 23 Aug 2023 16:05:03 +0000
Subject: [PATCH 003/204] Do not add pointless aliases to ARRAY JOIN

---
 src/Analyzer/createUniqueTableAliases.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 782f2f35749..d94ea250450 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -36,8 +36,6 @@ public:
             case QueryTreeNodeType::TABLE:
                 [[fallthrough]];
             case QueryTreeNodeType::TABLE_FUNCTION:
-                [[fallthrough]];
-            case QueryTreeNodeType::ARRAY_JOIN:
             {
                 auto & alias = table_expression_to_alias[node];
                 if (alias.empty())

From b9a2a1b51c45a247a109199a7e2f1733646359bc Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 23 Aug 2023 16:06:14 +0000
Subject: [PATCH 004/204] Update reference files

---
 ...00621_regression_for_in_operator.reference |   4 +-
 ...01300_group_by_other_keys_having.reference |   8 +-
 ..._redundant_functions_in_order_by.reference |  28 ++--
 .../01455_opentelemetry_distributed.reference |   4 +-
 ...2_constraints_where_optimization.reference |  12 +-
 ...ns_optimize_read_in_window_order.reference |  10 +-
 .../02366_explain_query_tree.reference        |   6 +-
 .../02381_join_dup_columns_in_plan.reference  |  44 +++---
 .../0_stateless/02421_explain_subquery.sql    |   2 +-
 .../02476_fuse_sum_count.reference            |  16 +--
 .../02477_fuse_quantiles.reference            |   6 +-
 .../02481_aggregation_in_order_plan.reference |   2 +-
 ...optimize_aggregation_arithmetics.reference |   4 +-
 ...yzer_optimize_grouping_sets_keys.reference |   8 +-
 ..._injective_functions_elimination.reference |   2 +-
 ...497_if_transform_strings_to_enum.reference |  28 ++--
 .../02514_analyzer_drop_join_on.reference     | 134 +++++++++---------
 ...write_aggregate_function_with_if.reference |  32 ++---
 ...02534_analyzer_grouping_function.reference |  10 +-
 ...imizer_removing_redundant_checks.reference |  10 +-
 ...702_logical_optimizer_with_nulls.reference |   6 +-
 ...02771_parallel_replicas_analyzer.reference |   2 +-
 ...mizations_ast_query_tree_rewrite.reference |  56 ++++----
 23 files changed, 217 insertions(+), 217 deletions(-)

diff --git a/tests/queries/0_stateless/00621_regression_for_in_operator.reference b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
index ab8bcf307eb..b68f550a742 100644
--- a/tests/queries/0_stateless/00621_regression_for_in_operator.reference
+++ b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
@@ -10,7 +10,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.regression_for_in_operator_view
+    TABLE id: 3, alias: __table1, table_name: default.regression_for_in_operator_view
   WHERE
     FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -27,7 +27,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.regression_for_in_operator_view
+    TABLE id: 3, alias: __table1, table_name: default.regression_for_in_operator_view
   WHERE
     FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/01300_group_by_other_keys_having.reference b/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
index a9be79800c1..f861da3da2b 100644
--- a/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
+++ b/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
@@ -49,7 +49,7 @@ QUERY id: 0
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -124,7 +124,7 @@ QUERY id: 0
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -194,7 +194,7 @@ QUERY id: 0
                   COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
                   CONSTANT id: 11, constant_value: UInt64_5, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -276,7 +276,7 @@ QUERY id: 0
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index bf184d142ec..d47f12ff4d1 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -49,14 +49,14 @@ QUERY id: 0
           LIST id: 3, nodes: 1
             COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
   JOIN TREE
-    QUERY id: 5, is_subquery: 1
+    QUERY id: 5, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         x UInt64
       PROJECTION
         LIST id: 6, nodes: 1
           COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
       JOIN TREE
-        TABLE_FUNCTION id: 8, table_function_name: numbers
+        TABLE_FUNCTION id: 8, alias: __table2, table_function_name: numbers
           ARGUMENTS
             LIST id: 9, nodes: 1
               CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
@@ -83,14 +83,14 @@ QUERY id: 0
           LIST id: 3, nodes: 1
             COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
   JOIN TREE
-    QUERY id: 5, is_subquery: 1
+    QUERY id: 5, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         x UInt64
       PROJECTION
         LIST id: 6, nodes: 1
           COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
       JOIN TREE
-        TABLE_FUNCTION id: 8, table_function_name: numbers
+        TABLE_FUNCTION id: 8, alias: __table2, table_function_name: numbers
           ARGUMENTS
             LIST id: 9, nodes: 1
               CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
@@ -119,14 +119,14 @@ QUERY id: 0
           LIST id: 3, nodes: 1
             COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
   JOIN TREE
-    QUERY id: 5, is_subquery: 1
+    QUERY id: 5, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         x UInt64
       PROJECTION
         LIST id: 6, nodes: 1
           COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
       JOIN TREE
-        TABLE_FUNCTION id: 8, table_function_name: numbers
+        TABLE_FUNCTION id: 8, alias: __table2, table_function_name: numbers
           ARGUMENTS
             LIST id: 9, nodes: 1
               CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
@@ -171,7 +171,7 @@ QUERY id: 0
   JOIN TREE
     JOIN id: 8, strictness: ALL, kind: FULL
       LEFT TABLE EXPRESSION
-        QUERY id: 3, alias: s, is_subquery: 1
+        QUERY id: 3, alias: __table1, is_subquery: 1
           PROJECTION COLUMNS
             key UInt64
           PROJECTION
@@ -182,12 +182,12 @@ QUERY id: 0
                     COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13
                     CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8
           JOIN TREE
-            TABLE_FUNCTION id: 13, table_function_name: numbers
+            TABLE_FUNCTION id: 13, alias: __table2, table_function_name: numbers
               ARGUMENTS
                 LIST id: 15, nodes: 1
                   CONSTANT id: 16, constant_value: UInt64_4, constant_value_type: UInt8
       RIGHT TABLE EXPRESSION
-        TABLE id: 5, alias: t, table_name: default.test
+        TABLE id: 5, alias: __table3, table_name: default.test
       JOIN EXPRESSION
         LIST id: 17, nodes: 1
           COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8
@@ -220,7 +220,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.test
+    TABLE id: 3, alias: __table1, table_name: default.test
   ORDER BY
     LIST id: 5, nodes: 2
       SORT id: 6, sort_direction: ASCENDING, with_fill: 0
@@ -246,7 +246,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.test
+    TABLE id: 3, alias: __table1, table_name: default.test
   ORDER BY
     LIST id: 5, nodes: 2
       SORT id: 6, sort_direction: ASCENDING, with_fill: 0
@@ -270,7 +270,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.test
+    TABLE id: 3, alias: __table1, table_name: default.test
   GROUP BY
     LIST id: 4, nodes: 1
       COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
@@ -297,9 +297,9 @@ QUERY id: 0
   JOIN TREE
     JOIN id: 6, strictness: ALL, kind: INNER
       LEFT TABLE EXPRESSION
-        TABLE id: 3, table_name: default.t1
+        TABLE id: 3, alias: __table1, table_name: default.t1
       RIGHT TABLE EXPRESSION
-        TABLE id: 5, table_name: default.t2
+        TABLE id: 5, alias: __table2, table_name: default.t2
       JOIN EXPRESSION
         FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8
           ARGUMENTS
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference
index a6d43856aec..2920b387aa2 100644
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference
@@ -1,9 +1,9 @@
 ===http===
 {"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1}
 {"query":"DESC TABLE system.one","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1}
-{"query":"SELECT 1 AS `1` FROM `system`.`one`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1}
+{"query":"SELECT 1 AS `1` FROM `system`.`one` AS `__table1`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1}
 {"query":"DESC TABLE system.one","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1}
-{"query":"SELECT 1 AS `1` FROM `system`.`one`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1}
+{"query":"SELECT 1 AS `1` FROM `system`.`one` AS `__table1`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1}
 {"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1}
 {"total spans":"3","unique spans":"3","unique non-zero parent spans":"3"}
 {"initial query spans with proper parent":"2"}
diff --git a/tests/queries/0_stateless/01622_constraints_where_optimization.reference b/tests/queries/0_stateless/01622_constraints_where_optimization.reference
index b5520d75b0e..3f6e8211f1a 100644
--- a/tests/queries/0_stateless/01622_constraints_where_optimization.reference
+++ b/tests/queries/0_stateless/01622_constraints_where_optimization.reference
@@ -8,7 +8,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   WHERE
     CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8
   SETTINGS allow_experimental_analyzer=1
@@ -22,7 +22,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   WHERE
     CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8
   SETTINGS allow_experimental_analyzer=1
@@ -36,7 +36,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   WHERE
     CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8
   SETTINGS allow_experimental_analyzer=1
@@ -50,7 +50,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   WHERE
     FUNCTION id: 4, function_name: less, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -68,7 +68,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   PREWHERE
     FUNCTION id: 4, function_name: less, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -85,5 +85,5 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.t_constraints_where
+    TABLE id: 3, alias: __table1, table_name: default.t_constraints_where
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference
index 8a33df9fad2..7c2753124b3 100644
--- a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference
+++ b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference
@@ -7,19 +7,19 @@ Partial sorting plan
     Prefix sort description: n ASC
     Result sort description: n ASC, x ASC
   optimize_read_in_window_order=1, allow_experimental_analyzer=1
-    Prefix sort description: n_0 ASC
-    Result sort description: n_0 ASC, x_1 ASC
+    Prefix sort description: __table1.n ASC
+    Result sort description: __table1.n ASC, __table1.x ASC
 No sorting plan
   optimize_read_in_window_order=0
     Sort description: n ASC, x ASC
   optimize_read_in_window_order=0, allow_experimental_analyzer=1
-    Sort description: n_0 ASC, x_1 ASC
+    Sort description: __table1.n ASC, __table1.x ASC
   optimize_read_in_window_order=1
     Prefix sort description: n ASC, x ASC
     Result sort description: n ASC, x ASC
   optimize_read_in_window_order=1, allow_experimental_analyzer=1
-    Prefix sort description: n_0 ASC, x_1 ASC
-    Result sort description: n_0 ASC, x_1 ASC
+    Prefix sort description: __table1.n ASC, __table1.x ASC
+    Result sort description: __table1.n ASC, __table1.x ASC
 Complex ORDER BY
   optimize_read_in_window_order=0
 3	3	1
diff --git a/tests/queries/0_stateless/02366_explain_query_tree.reference b/tests/queries/0_stateless/02366_explain_query_tree.reference
index 769d7661e68..acbedbd0622 100644
--- a/tests/queries/0_stateless/02366_explain_query_tree.reference
+++ b/tests/queries/0_stateless/02366_explain_query_tree.reference
@@ -22,7 +22,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: value, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.test_table
+    TABLE id: 3, alias: __table1, table_name: default.test_table
 --
 QUERY id: 0
   PROJECTION
@@ -64,7 +64,7 @@ QUERY id: 0
                       CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8
             CONSTANT id: 10, constant_value: Array_[UInt64_1, UInt64_2, UInt64_3], constant_value_type: Array(UInt8)
   JOIN TREE
-    TABLE id: 11, table_name: default.test_table
+    TABLE id: 11, alias: __table1, table_name: default.test_table
 --
 QUERY id: 0
   WITH
@@ -99,4 +99,4 @@ QUERY id: 0
             COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5
             CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.test_table
+    TABLE id: 5, alias: __table1, table_name: default.test_table
diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference
index dd5c9d4616e..5dd39c39852 100644
--- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference
+++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference
@@ -2,51 +2,51 @@ Expression
 Header: key String
         value String
   Join
-  Header: key_0 String
-          value_1 String
+  Header: __table1.key String
+          __table3.value String
     Expression
-    Header: key_0 String
+    Header: __table1.key String
       ReadFromStorage
       Header: dummy UInt8
     Union
-    Header: key_2 String
-            value_1 String
+    Header: __table3.key String
+            __table3.value String
       Expression
-      Header: key_2 String
-              value_1 String
+      Header: __table3.key String
+              __table3.value String
         ReadFromStorage
         Header: dummy UInt8
       Expression
-      Header: key_2 String
-              value_1 String
+      Header: __table3.key String
+              __table3.value String
         ReadFromStorage
         Header: dummy UInt8
 Expression
 Header: key String
         value String
   Join
-  Header: key_0 String
-          key_2 String
-          value_1 String
+  Header: __table1.key String
+          __table3.key String
+          __table3.value String
     Sorting
-    Header: key_0 String
+    Header: __table1.key String
       Expression
-      Header: key_0 String
+      Header: __table1.key String
         ReadFromStorage
         Header: dummy UInt8
     Sorting
-    Header: key_2 String
-            value_1 String
+    Header: __table3.key String
+            __table3.value String
       Union
-      Header: key_2 String
-              value_1 String
+      Header: __table3.key String
+              __table3.value String
         Expression
-        Header: key_2 String
-                value_1 String
+        Header: __table3.key String
+                __table3.value String
           ReadFromStorage
           Header: dummy UInt8
         Expression
-        Header: key_2 String
-                value_1 String
+        Header: __table3.key String
+                __table3.value String
           ReadFromStorage
           Header: dummy UInt8
diff --git a/tests/queries/0_stateless/02421_explain_subquery.sql b/tests/queries/0_stateless/02421_explain_subquery.sql
index 4b970f81219..2970003cb1c 100644
--- a/tests/queries/0_stateless/02421_explain_subquery.sql
+++ b/tests/queries/0_stateless/02421_explain_subquery.sql
@@ -34,7 +34,7 @@ DROP TABLE t1;
 
 SET allow_experimental_analyzer = 1;
 
-SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: number__ UInt64%';
+SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: \_\_table1.number UInt64%';
 SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
 SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
 SELECT count() > 0 FROM (EXPLAIN CURRENT TRANSACTION);
diff --git a/tests/queries/0_stateless/02476_fuse_sum_count.reference b/tests/queries/0_stateless/02476_fuse_sum_count.reference
index 43a39e8b7e5..1eb156743b0 100644
--- a/tests/queries/0_stateless/02476_fuse_sum_count.reference
+++ b/tests/queries/0_stateless/02476_fuse_sum_count.reference
@@ -21,7 +21,7 @@ QUERY id: 0
           LIST id: 7, nodes: 1
             COLUMN id: 4, column_name: a, result_type: Nullable(Int8), source_id: 5
   JOIN TREE
-    TABLE id: 5, table_name: default.fuse_tbl
+    TABLE id: 5, alias: __table1, table_name: default.fuse_tbl
 QUERY id: 0
   PROJECTION COLUMNS
     sum(b) Int64
@@ -59,7 +59,7 @@ QUERY id: 0
                               COLUMN id: 6, column_name: b, result_type: Int8, source_id: 7
                         CONSTANT id: 18, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 7, table_name: default.fuse_tbl
+    TABLE id: 7, alias: __table1, table_name: default.fuse_tbl
 QUERY id: 0
   PROJECTION COLUMNS
     sum(plus(a, 1)) Nullable(Int64)
@@ -138,7 +138,7 @@ QUERY id: 0
           LIST id: 39, nodes: 1
             COLUMN id: 6, column_name: a, result_type: Nullable(Int8), source_id: 7
   JOIN TREE
-    TABLE id: 7, table_name: default.fuse_tbl
+    TABLE id: 7, alias: __table1, table_name: default.fuse_tbl
 QUERY id: 0
   PROJECTION COLUMNS
     multiply(avg(b), 3) Float64
@@ -215,14 +215,14 @@ QUERY id: 0
                         COLUMN id: 10, column_name: b, result_type: Int8, source_id: 11
                   CONSTANT id: 37, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    QUERY id: 11, is_subquery: 1
+    QUERY id: 11, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         b Int8
       PROJECTION
         LIST id: 38, nodes: 1
           COLUMN id: 39, column_name: b, result_type: Int8, source_id: 40
       JOIN TREE
-        TABLE id: 40, table_name: default.fuse_tbl
+        TABLE id: 40, alias: __table2, table_name: default.fuse_tbl
 QUERY id: 0
   PROJECTION COLUMNS
     sum(b) Int64
@@ -246,14 +246,14 @@ QUERY id: 0
                   COLUMN id: 6, column_name: b, result_type: Int64, source_id: 7
             CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    QUERY id: 7, is_subquery: 1
+    QUERY id: 7, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         b Int64
       PROJECTION
         LIST id: 12, nodes: 1
           COLUMN id: 13, column_name: x, result_type: Int64, source_id: 14
       JOIN TREE
-        QUERY id: 14, is_subquery: 1
+        QUERY id: 14, alias: __table2, is_subquery: 1
           PROJECTION COLUMNS
             x Int64
             count(b) UInt64
@@ -276,7 +276,7 @@ QUERY id: 0
                           COLUMN id: 20, column_name: b, result_type: Int8, source_id: 21
                     CONSTANT id: 25, constant_value: UInt64_2, constant_value_type: UInt8
           JOIN TREE
-            TABLE id: 21, table_name: default.fuse_tbl
+            TABLE id: 21, alias: __table3, table_name: default.fuse_tbl
 0	0	nan
 0	0	nan
 45	10	4.5	Decimal(38, 0)	UInt64	Float64
diff --git a/tests/queries/0_stateless/02477_fuse_quantiles.reference b/tests/queries/0_stateless/02477_fuse_quantiles.reference
index 7c7d581f7fb..7603381416c 100644
--- a/tests/queries/0_stateless/02477_fuse_quantiles.reference
+++ b/tests/queries/0_stateless/02477_fuse_quantiles.reference
@@ -34,7 +34,7 @@ QUERY id: 0
                   COLUMN id: 9, column_name: b, result_type: Float64, source_id: 10
             CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    QUERY id: 10, is_subquery: 1
+    QUERY id: 10, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         b Float64
       PROJECTION
@@ -45,7 +45,7 @@ QUERY id: 0
                 COLUMN id: 18, column_name: x, result_type: Float64, source_id: 19
                 CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8
       JOIN TREE
-        QUERY id: 19, is_subquery: 1
+        QUERY id: 19, alias: __table2, is_subquery: 1
           PROJECTION COLUMNS
             x Float64
             quantile(0.9)(b) Float64
@@ -76,7 +76,7 @@ QUERY id: 0
                           COLUMN id: 29, column_name: b, result_type: Int32, source_id: 30
                     CONSTANT id: 34, constant_value: UInt64_2, constant_value_type: UInt8
           JOIN TREE
-            TABLE id: 30, table_name: default.fuse_tbl
+            TABLE id: 30, alias: __table3, table_name: default.fuse_tbl
       GROUP BY
         LIST id: 35, nodes: 1
           COLUMN id: 18, column_name: x, result_type: Float64, source_id: 19
diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference
index b11f3e3a1d3..969ec320790 100644
--- a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference
+++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference
@@ -6,5 +6,5 @@
   Order: a ASC, c ASC
       ReadFromMergeTree (default.tab)
   Aggregating
-  Order: a_0 ASC, c_2 ASC
+  Order: __table1.a ASC, __table1.c ASC
         ReadFromMergeTree (default.tab)
diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_aggregation_arithmetics.reference b/tests/queries/0_stateless/02481_analyzer_optimize_aggregation_arithmetics.reference
index 22dda253066..a26773baae2 100644
--- a/tests/queries/0_stateless/02481_analyzer_optimize_aggregation_arithmetics.reference
+++ b/tests/queries/0_stateless/02481_analyzer_optimize_aggregation_arithmetics.reference
@@ -20,7 +20,7 @@ QUERY id: 0
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
@@ -44,7 +44,7 @@ QUERY id: 0
                     LIST id: 10, nodes: 1
                       CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference
index 03722034708..9f9c1da5e88 100644
--- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference
+++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference
@@ -17,7 +17,7 @@ QUERY id: 0, group_by_type: grouping_sets
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -103,7 +103,7 @@ QUERY id: 0, group_by_type: grouping_sets
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -180,7 +180,7 @@ QUERY id: 0, group_by_type: grouping_sets
                 LIST id: 9, nodes: 1
                   COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32
@@ -253,7 +253,7 @@ QUERY id: 0, group_by_type: grouping_sets
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE_FUNCTION id: 3, table_function_name: numbers
+    TABLE_FUNCTION id: 3, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 4, nodes: 1
           CONSTANT id: 5, constant_value: UInt64_1000, constant_value_type: UInt16
diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
index c42b9ce0cc4..0a63039202e 100644
--- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
+++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
@@ -13,7 +13,7 @@ QUERY id: 0
                     LIST id: 6, nodes: 1
                       CONSTANT id: 7, constant_value: \'\', constant_value_type: String
   JOIN TREE
-    TABLE_FUNCTION id: 8, table_function_name: numbers
+    TABLE_FUNCTION id: 8, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 1
           CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
index 88f23334d31..d77fd1028f2 100644
--- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
+++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
@@ -35,7 +35,7 @@ QUERY id: 0
                         CONSTANT id: 15, constant_value: \'other\', constant_value_type: String
                         CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
   JOIN TREE
-    TABLE id: 7, table_name: system.numbers
+    TABLE id: 7, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64
 google
@@ -78,7 +78,7 @@ QUERY id: 0
                         CONSTANT id: 17, constant_value: \'google\', constant_value_type: String
                         CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String
   JOIN TREE
-    TABLE id: 9, table_name: system.numbers
+    TABLE id: 9, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt64
 other1
@@ -122,7 +122,7 @@ QUERY id: 0
                               CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
             CONSTANT id: 19, constant_value: \'1\', constant_value_type: String
   JOIN TREE
-    TABLE id: 9, table_name: system.numbers
+    TABLE id: 9, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64
 google1
@@ -169,7 +169,7 @@ QUERY id: 0
                               CONSTANT id: 20, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String
             CONSTANT id: 21, constant_value: \'1\', constant_value_type: String
   JOIN TREE
-    TABLE id: 11, table_name: system.numbers
+    TABLE id: 11, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt64
 google
@@ -196,7 +196,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: String, source_id: 3
   JOIN TREE
-    QUERY id: 3, alias: t1, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value String
       PROJECTION
@@ -223,7 +223,7 @@ QUERY id: 0
                             CONSTANT id: 20, constant_value: \'google\', constant_value_type: String
                             CONSTANT id: 21, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String
       JOIN TREE
-        TABLE id: 12, table_name: system.numbers
+        TABLE id: 12, alias: __table2, table_name: system.numbers
       LIMIT
         CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt64
 other
@@ -250,7 +250,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: String, source_id: 3
   JOIN TREE
-    QUERY id: 3, alias: t1, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value String
       PROJECTION
@@ -274,7 +274,7 @@ QUERY id: 0
                             CONSTANT id: 18, constant_value: \'other\', constant_value_type: String
                             CONSTANT id: 19, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
       JOIN TREE
-        TABLE id: 10, table_name: system.numbers
+        TABLE id: 10, alias: __table2, table_name: system.numbers
       LIMIT
         CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64
 google	google
@@ -341,7 +341,7 @@ QUERY id: 0
                         CONSTANT id: 17, constant_value: \'google\', constant_value_type: String
                         CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String
   JOIN TREE
-    TABLE id: 9, table_name: system.numbers
+    TABLE id: 9, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt64
 other	other
@@ -402,7 +402,7 @@ QUERY id: 0
                         CONSTANT id: 15, constant_value: \'other\', constant_value_type: String
                         CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
   JOIN TREE
-    TABLE id: 7, table_name: system.numbers
+    TABLE id: 7, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64
 other
@@ -446,14 +446,14 @@ QUERY id: 0
                         CONSTANT id: 15, constant_value: \'other\', constant_value_type: String
                         CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
   JOIN TREE
-    QUERY id: 7, is_subquery: 1
+    QUERY id: 7, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         number Nullable(Nothing)
       PROJECTION
         LIST id: 17, nodes: 1
           CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing)
       JOIN TREE
-        TABLE id: 19, table_name: system.numbers
+        TABLE id: 19, alias: __table2, table_name: system.numbers
       LIMIT
         CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64
 other
@@ -482,7 +482,7 @@ QUERY id: 0
             CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
             CONSTANT id: 8, constant_value: \'other\', constant_value_type: String
   JOIN TREE
-    TABLE id: 5, table_name: system.numbers
+    TABLE id: 5, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt64
 google
@@ -514,6 +514,6 @@ QUERY id: 0
             CONSTANT id: 9, constant_value: \'censor.net\', constant_value_type: String
             CONSTANT id: 10, constant_value: \'google\', constant_value_type: String
   JOIN TREE
-    TABLE id: 7, table_name: system.numbers
+    TABLE id: 7, alias: __table1, table_name: system.numbers
   LIMIT
     CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt64
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index 1b177b84afa..7e327a863cf 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -6,43 +6,43 @@ SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1
 Expression ((Project names + Projection))
 Header: count() UInt64
   Aggregating
-  Header: a2_4 String
+  Header: __table1.a2 String
           count() UInt64
     Expression ((Before GROUP BY + DROP unused columns after JOIN))
-    Header: a2_4 String
+    Header: __table1.a2 String
       Join (JOIN FillRightFirst)
-      Header: a2_4 String
-              c1_2 UInt64
+      Header: __table1.a2 String
+              __table3.c1 UInt64
         Expression ((JOIN actions + DROP unused columns after JOIN))
-        Header: a2_4 String
-                c1_2 UInt64
+        Header: __table1.a2 String
+                __table3.c1 UInt64
           Join (JOIN FillRightFirst)
-          Header: a2_4 String
-                  b1_0 UInt64
-                  c1_2 UInt64
+          Header: __table1.a2 String
+                  __table2.b1 UInt64
+                  __table3.c1 UInt64
             Expression ((JOIN actions + DROP unused columns after JOIN))
-            Header: a2_4 String
-                    b1_0 UInt64
+            Header: __table1.a2 String
+                    __table2.b1 UInt64
               Join (JOIN FillRightFirst)
-              Header: a1_1 UInt64
-                      a2_4 String
-                      b1_0 UInt64
+              Header: __table1.a1 UInt64
+                      __table1.a2 String
+                      __table2.b1 UInt64
                 Expression ((JOIN actions + Change column names to column identifiers))
-                Header: a1_1 UInt64
-                        a2_4 String
+                Header: __table1.a1 UInt64
+                        __table1.a2 String
                   ReadFromMemoryStorage
                   Header: a1 UInt64
                           a2 String
                 Expression ((JOIN actions + Change column names to column identifiers))
-                Header: b1_0 UInt64
+                Header: __table2.b1 UInt64
                   ReadFromMemoryStorage
                   Header: b1 UInt64
             Expression ((JOIN actions + Change column names to column identifiers))
-            Header: c1_2 UInt64
+            Header: __table3.c1 UInt64
               ReadFromMemoryStorage
               Header: c1 UInt64
         Expression ((JOIN actions + Change column names to column identifiers))
-        Header: d1_3 UInt64
+        Header: __table4.d1 UInt64
           ReadFromMemoryStorage
           Header: d1 UInt64
 EXPLAIN PLAN header = 1
@@ -52,38 +52,38 @@ Expression ((Project names + (Projection + DROP unused columns after JOIN)))
 Header: a2 String
         d2 String
   Join (JOIN FillRightFirst)
-  Header: a2_0 String
-          k_2 UInt64
-          d2_1 String
+  Header: __table1.a2 String
+          __table1.k UInt64
+          __table4.d2 String
     Expression (DROP unused columns after JOIN)
-    Header: a2_0 String
-            k_2 UInt64
+    Header: __table1.a2 String
+            __table1.k UInt64
       Join (JOIN FillRightFirst)
-      Header: a2_0 String
-              k_2 UInt64
+      Header: __table1.a2 String
+              __table1.k UInt64
         Expression (DROP unused columns after JOIN)
-        Header: a2_0 String
-                k_2 UInt64
+        Header: __table1.a2 String
+                __table1.k UInt64
           Join (JOIN FillRightFirst)
-          Header: a2_0 String
-                  k_2 UInt64
+          Header: __table1.a2 String
+                  __table1.k UInt64
             Expression (Change column names to column identifiers)
-            Header: a2_0 String
-                    k_2 UInt64
+            Header: __table1.a2 String
+                    __table1.k UInt64
               ReadFromMemoryStorage
               Header: a2 String
                       k UInt64
             Expression (Change column names to column identifiers)
-            Header: k_3 UInt64
+            Header: __table2.k UInt64
               ReadFromMemoryStorage
               Header: k UInt64
         Expression (Change column names to column identifiers)
-        Header: k_4 UInt64
+        Header: __table3.k UInt64
           ReadFromMemoryStorage
           Header: k UInt64
     Expression (Change column names to column identifiers)
-    Header: d2_1 String
-            k_5 UInt64
+    Header: __table4.d2 String
+            __table4.k UInt64
       ReadFromMemoryStorage
       Header: d2 String
               k UInt64
@@ -97,55 +97,55 @@ WHERE c.c2 != '' ORDER BY a.a2
 Expression (Project names)
 Header: bx String
   Sorting (Sorting for ORDER BY)
-  Header: a2_6 String
-          bx_0 String
+  Header: __table1.a2 String
+          __table2.bx String
     Expression ((Before ORDER BY + (Projection + )))
-    Header: a2_6 String
-            bx_0 String
+    Header: __table1.a2 String
+            __table2.bx String
       Join (JOIN FillRightFirst)
-      Header: a2_6 String
-              bx_0 String
-              c2_5 String
-              c1_3 UInt64
+      Header: __table1.a2 String
+              __table2.bx String
+              __table4.c2 String
+              __table4.c1 UInt64
         Expression
-        Header: a2_6 String
-                bx_0 String
-                c2_5 String
-                c1_3 UInt64
+        Header: __table1.a2 String
+                __table2.bx String
+                __table4.c2 String
+                __table4.c1 UInt64
           Join (JOIN FillRightFirst)
-          Header: a2_6 String
-                  bx_0 String
-                  b1_1 UInt64
-                  c2_5 String
-                  c1_3 UInt64
+          Header: __table1.a2 String
+                  __table2.bx String
+                  __table2.b1 UInt64
+                  __table4.c2 String
+                  __table4.c1 UInt64
             Expression ((JOIN actions + DROP unused columns after JOIN))
-            Header: a2_6 String
-                    bx_0 String
-                    b1_1 UInt64
+            Header: __table1.a2 String
+                    __table2.bx String
+                    __table2.b1 UInt64
               Join (JOIN FillRightFirst)
-              Header: a1_2 UInt64
-                      a2_6 String
-                      bx_0 String
-                      b1_1 UInt64
+              Header: __table1.a1 UInt64
+                      __table1.a2 String
+                      __table2.bx String
+                      __table2.b1 UInt64
                 Expression ((JOIN actions + Change column names to column identifiers))
-                Header: a1_2 UInt64
-                        a2_6 String
+                Header: __table1.a1 UInt64
+                        __table1.a2 String
                   ReadFromMemoryStorage
                   Header: a1 UInt64
                           a2 String
                 Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
-                Header: b1_1 UInt64
-                        bx_0 String
+                Header: __table2.b1 UInt64
+                        __table2.bx String
                   ReadFromMemoryStorage
                   Header: b1 UInt64
                           b2 String
             Filter (( + (JOIN actions + Change column names to column identifiers)))
-            Header: c1_3 UInt64
-                    c2_5 String
+            Header: __table4.c1 UInt64
+                    __table4.c2 String
               ReadFromMemoryStorage
               Header: c1 UInt64
                       c2 String
         Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
-        Header: d1_4 UInt64
+        Header: __table5.d1 UInt64
           ReadFromStorage (SystemNumbers)
           Header: number UInt64
diff --git a/tests/queries/0_stateless/02518_rewrite_aggregate_function_with_if.reference b/tests/queries/0_stateless/02518_rewrite_aggregate_function_with_if.reference
index 37680adf8e0..15543789c1d 100644
--- a/tests/queries/0_stateless/02518_rewrite_aggregate_function_with_if.reference
+++ b/tests/queries/0_stateless/02518_rewrite_aggregate_function_with_if.reference
@@ -17,7 +17,7 @@ QUERY id: 0
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
                   CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -40,7 +40,7 @@ QUERY id: 0
                   CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -63,7 +63,7 @@ QUERY id: 0
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
                   CONSTANT id: 11, constant_value: NULL, constant_value_type: Nullable(Nothing)
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -86,7 +86,7 @@ QUERY id: 0
                   CONSTANT id: 11, constant_value: NULL, constant_value_type: Nullable(Nothing)
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -109,7 +109,7 @@ QUERY id: 0
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
                   CONSTANT id: 11, constant_value: NULL, constant_value_type: Nullable(Nothing)
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -132,7 +132,7 @@ QUERY id: 0
                   CONSTANT id: 11, constant_value: NULL, constant_value_type: Nullable(Nothing)
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
@@ -160,7 +160,7 @@ QUERY id: 0
                   COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13
                   CONSTANT id: 15, constant_value: NULL, constant_value_type: Nullable(Nothing)
   JOIN TREE
-    TABLE_FUNCTION id: 13, table_function_name: numbers
+    TABLE_FUNCTION id: 13, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 16, nodes: 1
           CONSTANT id: 17, constant_value: UInt64_100, constant_value_type: UInt8
@@ -188,7 +188,7 @@ QUERY id: 0
                   CONSTANT id: 15, constant_value: NULL, constant_value_type: Nullable(Nothing)
                   COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13
   JOIN TREE
-    TABLE_FUNCTION id: 13, table_function_name: numbers
+    TABLE_FUNCTION id: 13, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 16, nodes: 1
           CONSTANT id: 17, constant_value: UInt64_100, constant_value_type: UInt8
@@ -207,7 +207,7 @@ QUERY id: 0
                   COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                   CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 1
           CONSTANT id: 10, constant_value: UInt64_100, constant_value_type: UInt8
@@ -229,7 +229,7 @@ QUERY id: 0
                         COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                         CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 11, nodes: 1
           CONSTANT id: 12, constant_value: UInt64_100, constant_value_type: UInt8
@@ -248,7 +248,7 @@ QUERY id: 0
                   COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                   CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 1
           CONSTANT id: 10, constant_value: UInt64_100, constant_value_type: UInt8
@@ -270,7 +270,7 @@ QUERY id: 0
                         COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                         CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 11, nodes: 1
           CONSTANT id: 12, constant_value: UInt64_100, constant_value_type: UInt8
@@ -289,7 +289,7 @@ QUERY id: 0
                   COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                   CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 1
           CONSTANT id: 10, constant_value: UInt64_100, constant_value_type: UInt8
@@ -311,7 +311,7 @@ QUERY id: 0
                         COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
                         CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 5, table_function_name: numbers
+    TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 11, nodes: 1
           CONSTANT id: 12, constant_value: UInt64_100, constant_value_type: UInt8
@@ -335,7 +335,7 @@ QUERY id: 0
                   COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
                   CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 13, nodes: 1
           CONSTANT id: 14, constant_value: UInt64_100, constant_value_type: UInt8
@@ -362,7 +362,7 @@ QUERY id: 0
                         COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
                         CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 15, nodes: 1
           CONSTANT id: 16, constant_value: UInt64_100, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02534_analyzer_grouping_function.reference b/tests/queries/0_stateless/02534_analyzer_grouping_function.reference
index fcbf625ef22..1b496644547 100644
--- a/tests/queries/0_stateless/02534_analyzer_grouping_function.reference
+++ b/tests/queries/0_stateless/02534_analyzer_grouping_function.reference
@@ -16,7 +16,7 @@ QUERY id: 0
           LIST id: 7, nodes: 1
             COLUMN id: 8, column_name: value, result_type: String, source_id: 5
   JOIN TREE
-    TABLE id: 5, table_name: default.test_table
+    TABLE id: 5, alias: __table1, table_name: default.test_table
   GROUP BY
     LIST id: 9, nodes: 2
       COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5
@@ -42,7 +42,7 @@ QUERY id: 0, group_by_type: rollup
             COLUMN id: 9, column_name: __grouping_set, result_type: UInt64
             COLUMN id: 10, column_name: value, result_type: String, source_id: 6
   JOIN TREE
-    TABLE id: 6, table_name: default.test_table
+    TABLE id: 6, alias: __table1, table_name: default.test_table
   GROUP BY
     LIST id: 11, nodes: 2
       COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6
@@ -70,7 +70,7 @@ QUERY id: 0, group_by_type: cube
             COLUMN id: 9, column_name: __grouping_set, result_type: UInt64
             COLUMN id: 10, column_name: value, result_type: String, source_id: 6
   JOIN TREE
-    TABLE id: 6, table_name: default.test_table
+    TABLE id: 6, alias: __table1, table_name: default.test_table
   GROUP BY
     LIST id: 11, nodes: 2
       COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6
@@ -99,7 +99,7 @@ QUERY id: 0, group_by_type: grouping_sets
             COLUMN id: 9, column_name: __grouping_set, result_type: UInt64
             COLUMN id: 10, column_name: value, result_type: String, source_id: 6
   JOIN TREE
-    TABLE id: 6, table_name: default.test_table
+    TABLE id: 6, alias: __table1, table_name: default.test_table
   GROUP BY
     LIST id: 11, nodes: 2
       LIST id: 12, nodes: 1
@@ -128,7 +128,7 @@ QUERY id: 0, group_by_type: grouping_sets
             COLUMN id: 9, column_name: __grouping_set, result_type: UInt64
             COLUMN id: 10, column_name: value, result_type: String, source_id: 6
   JOIN TREE
-    TABLE id: 6, table_name: default.test_table
+    TABLE id: 6, alias: __table1, table_name: default.test_table
   GROUP BY
     LIST id: 11, nodes: 2
       LIST id: 12, nodes: 1
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
index d083e178586..9692d60d945 100644
--- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
@@ -9,7 +9,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -26,7 +26,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -42,7 +42,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8
 3	another
@@ -55,7 +55,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -80,7 +80,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference
index eb79bbc842a..2b9d6b5d9d2 100644
--- a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference
+++ b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference
@@ -9,7 +9,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02702_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02702_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: or, function_type: ordinary, result_type: Nullable(UInt8)
       ARGUMENTS
@@ -41,7 +41,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02702_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02702_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: or, function_type: ordinary, result_type: Nullable(UInt8)
       ARGUMENTS
@@ -68,7 +68,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Nullable(Int32), source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02702_logical_optimizer_with_null_column
+    TABLE id: 3, alias: __table1, table_name: default.02702_logical_optimizer_with_null_column
   WHERE
     FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: Nullable(UInt8)
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
index f688db940d9..2aecd2bcffd 100644
--- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
+++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
@@ -9,4 +9,4 @@
 7885388429666205427
 8124171311239967992
 1	1	-- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n    SETTINGS\n    allow_experimental_analyzer = 1,\n    max_parallel_replicas = 2,\n    cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n    allow_experimental_parallel_reading_from_replicas = 1,\n    use_hedged_requests = 0;
-0	2	SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1, use_hedged_requests = 0
+0	2	SELECT `__table1`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` AS `__table1` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1, use_hedged_requests = 0
diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference
index 0fd2f694aeb..63658890119 100644
--- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference
+++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference
@@ -8,7 +8,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -50,7 +50,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -92,7 +92,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -126,7 +126,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -160,7 +160,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -194,7 +194,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -228,7 +228,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -270,7 +270,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -335,7 +335,7 @@ QUERY id: 0
           LIST id: 5, nodes: 1
             COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 7, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -377,7 +377,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -412,7 +412,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   PREWHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -452,7 +452,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -492,7 +492,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -529,7 +529,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -566,7 +566,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -608,7 +608,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -650,7 +650,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -692,7 +692,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -726,7 +726,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -760,7 +760,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -794,7 +794,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -828,7 +828,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date_t
+    TABLE id: 3, alias: __table1, table_name: default.date_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -878,7 +878,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.datetime_t
+    TABLE id: 3, alias: __table1, table_name: default.datetime_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -920,7 +920,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.datetime_t
+    TABLE id: 3, alias: __table1, table_name: default.datetime_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -962,7 +962,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date32_t
+    TABLE id: 3, alias: __table1, table_name: default.date32_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -1004,7 +1004,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.date32_t
+    TABLE id: 3, alias: __table1, table_name: default.date32_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -1046,7 +1046,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.datetime64_t
+    TABLE id: 3, alias: __table1, table_name: default.datetime64_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -1088,7 +1088,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value1, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.datetime64_t
+    TABLE id: 3, alias: __table1, table_name: default.datetime64_t
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS

From ab7b6fcdae3986fcf086f11c9c770bfd4f694d92 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 24 Aug 2023 22:32:40 +0200
Subject: [PATCH 005/204] Update tests and revert change for ARRAY JOIN

---
 src/Analyzer/createUniqueTableAliases.cpp     |   2 +
 .../00736_disjunction_optimisation.reference  |  22 ++--
 ..._constraints_simple_optimization.reference |   6 +-
 .../01623_constraints_column_swap.reference   |  26 ++---
 .../01646_rewrite_sum_if.reference            |   6 +-
 .../01655_plan_optimizations.reference        |  34 +++---
 .../0_stateless/01655_plan_optimizations.sh   |  22 ++--
 .../02226_analyzer_or_like_combine.reference  |   4 +-
 .../02227_union_match_by_name.reference       |  16 +--
 .../0_stateless/02303_query_kind.reference    |  16 +--
 ...ct_in_order_optimization_explain.reference |  42 ++++----
 ..._input_stream_properties_explain.reference |  40 +++----
 .../02451_order_by_monotonic.reference        |  32 +++---
 ...ssions_optimizer_low_cardinality.reference |   6 +-
 ...2493_analyzer_sum_if_to_count_if.reference |   6 +-
 ...2498_analyzer_settings_push_down.reference |  20 ++--
 .../02564_analyzer_cross_to_inner.reference   |  24 ++---
 ..._predicate_push_down_sorting_fix.reference |  14 +--
 ...2576_rewrite_array_exists_to_has.reference |   8 +-
 ...dicate_push_down_filled_join_fix.reference |  30 +++---
 ...n_merge_tree_prewhere_row_policy.reference |   8 +-
 .../02835_join_step_explain.reference         | 100 +++++++++---------
 22 files changed, 243 insertions(+), 241 deletions(-)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index d94ea250450..782f2f35749 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -36,6 +36,8 @@ public:
             case QueryTreeNodeType::TABLE:
                 [[fallthrough]];
             case QueryTreeNodeType::TABLE_FUNCTION:
+                [[fallthrough]];
+            case QueryTreeNodeType::ARRAY_JOIN:
             {
                 auto & alias = table_expression_to_alias[node];
                 if (alias.empty())
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.reference b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
index 84477a64057..f28dcacef0e 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.reference
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
@@ -34,7 +34,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -77,7 +77,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         k UInt64
         s UInt64
@@ -86,7 +86,7 @@ QUERY id: 0
           COLUMN id: 6, column_name: k, result_type: UInt64, source_id: 7
           COLUMN id: 8, column_name: s, result_type: UInt64, source_id: 7
       JOIN TREE
-        TABLE id: 7, table_name: default.bug
+        TABLE id: 7, alias: __table2, table_name: default.bug
       WHERE
         FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
           ARGUMENTS
@@ -151,7 +151,7 @@ QUERY id: 0
             COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 16, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
 21	1
 22	1
@@ -184,7 +184,7 @@ QUERY id: 0
             COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
 1	21
 1	22
@@ -222,7 +222,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -265,7 +265,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
       COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         k UInt64
         s UInt64
@@ -274,7 +274,7 @@ QUERY id: 0
           COLUMN id: 6, column_name: k, result_type: UInt64, source_id: 7
           COLUMN id: 8, column_name: s, result_type: UInt64, source_id: 7
       JOIN TREE
-        TABLE id: 7, table_name: default.bug
+        TABLE id: 7, alias: __table2, table_name: default.bug
       WHERE
         FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
           ARGUMENTS
@@ -347,7 +347,7 @@ QUERY id: 0
             COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
 21	1
 22	1
@@ -380,7 +380,7 @@ QUERY id: 0
             COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
 21	1
 22	1
@@ -413,5 +413,5 @@ QUERY id: 0
             COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
-    TABLE id: 3, table_name: default.bug
+    TABLE id: 3, alias: __table1, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference
index a375c35ca3e..e0b02c2f59c 100644
--- a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference
+++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference
@@ -45,7 +45,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.constraint_test_constants
+    TABLE id: 3, alias: __table1, table_name: default.constraint_test_constants
   WHERE
     FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -63,7 +63,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.constraint_test_constants
+    TABLE id: 3, alias: __table1, table_name: default.constraint_test_constants
   WHERE
     FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -80,5 +80,5 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    TABLE id: 3, table_name: default.constraint_test_constants
+    TABLE id: 3, alias: __table1, table_name: default.constraint_test_constants
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/01623_constraints_column_swap.reference b/tests/queries/0_stateless/01623_constraints_column_swap.reference
index 3639ad47228..555a4c93f70 100644
--- a/tests/queries/0_stateless/01623_constraints_column_swap.reference
+++ b/tests/queries/0_stateless/01623_constraints_column_swap.reference
@@ -20,7 +20,7 @@ QUERY id: 0
             COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -50,7 +50,7 @@ QUERY id: 0
             COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   PREWHERE
     FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -80,7 +80,7 @@ QUERY id: 0
             COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -110,7 +110,7 @@ QUERY id: 0
             COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -140,7 +140,7 @@ QUERY id: 0
             COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -162,7 +162,7 @@ QUERY id: 0
             COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5
             CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8
   JOIN TREE
-    TABLE id: 5, table_name: default.column_swap_test_test
+    TABLE id: 5, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -191,7 +191,7 @@ QUERY id: 0
             CONSTANT id: 8, constant_value: UInt64_10, constant_value_type: UInt8
       COLUMN id: 9, column_name: a, result_type: String, source_id: 7
   JOIN TREE
-    TABLE id: 7, table_name: default.column_swap_test_test
+    TABLE id: 7, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -223,7 +223,7 @@ QUERY id: 0
             CONSTANT id: 8, constant_value: UInt64_10, constant_value_type: UInt8
       COLUMN id: 9, column_name: a, result_type: String, source_id: 7
   JOIN TREE
-    TABLE id: 7, table_name: default.column_swap_test_test
+    TABLE id: 7, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -248,7 +248,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: String, source_id: 3
       COLUMN id: 4, column_name: a, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.column_swap_test_test
+    TABLE id: 3, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -270,7 +270,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: String, source_id: 3
       COLUMN id: 4, column_name: a, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.column_swap_test_test
+    TABLE id: 3, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -292,7 +292,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: String, source_id: 3
       COLUMN id: 4, column_name: a, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.column_swap_test_test
+    TABLE id: 3, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -310,7 +310,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.column_swap_test_test
+    TABLE id: 3, alias: __table1, table_name: default.column_swap_test_test
   WHERE
     FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -327,5 +327,5 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: UInt32, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_bad_constraint
+    TABLE id: 3, alias: __table1, table_name: default.t_bad_constraint
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/01646_rewrite_sum_if.reference b/tests/queries/0_stateless/01646_rewrite_sum_if.reference
index 871c75737c6..af582908f03 100644
--- a/tests/queries/0_stateless/01646_rewrite_sum_if.reference
+++ b/tests/queries/0_stateless/01646_rewrite_sum_if.reference
@@ -56,7 +56,7 @@ QUERY id: 0
                               CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8
                         CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 12, table_function_name: numbers
+    TABLE_FUNCTION id: 12, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 15, nodes: 1
           CONSTANT id: 16, constant_value: UInt64_100, constant_value_type: UInt8
@@ -82,7 +82,7 @@ QUERY id: 0
                               CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8
                         CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 12, table_function_name: numbers
+    TABLE_FUNCTION id: 12, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 15, nodes: 1
           CONSTANT id: 16, constant_value: UInt64_100, constant_value_type: UInt8
@@ -111,7 +111,7 @@ QUERY id: 0
                                     CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
                               CONSTANT id: 16, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 14, table_function_name: numbers
+    TABLE_FUNCTION id: 14, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 17, nodes: 1
           CONSTANT id: 18, constant_value: UInt64_100, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference
index 54ca55d2068..436d06c5076 100644
--- a/tests/queries/0_stateless/01655_plan_optimizations.reference
+++ b/tests/queries/0_stateless/01655_plan_optimizations.reference
@@ -28,7 +28,7 @@ Aggregating
 Filter
 Filter
 > (analyzer) filter should be pushed down after aggregating, column after aggregation is const
-COLUMN Const(UInt8) -> notEquals(y_1, 0_UInt8)
+COLUMN Const(UInt8) -> notEquals(__table1.y, 0_UInt8)
 Aggregating
 Filter
 Filter
@@ -49,9 +49,9 @@ Aggregating
 Filter column: notEquals(y, 0)
 > (analyzer) one condition of filter should be pushed down after aggregating, other condition is aliased
 Filter column
-ALIAS notEquals(s_0, 4_UInt8) :: 0 -> and(notEquals(y_1, 0_UInt8), notEquals(s_0, 4_UInt8))
+ALIAS notEquals(__table1.s, 4_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8))
 Aggregating
-Filter column: notEquals(y_1, 0_UInt8)
+Filter column: notEquals(__table1.y, 0_UInt8)
 0	1
 1	2
 2	3
@@ -68,9 +68,9 @@ Aggregating
 Filter column: notEquals(y, 0)
 > (analyzer) one condition of filter should be pushed down after aggregating, other condition is casted
 Filter column
-FUNCTION and(minus(s_0, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(y_1, 0_UInt8), minus(s_0, 4_UInt8)) UInt8 : 2
+FUNCTION and(minus(__table1.s, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8)) UInt8 : 2
 Aggregating
-Filter column: notEquals(y_1, 0_UInt8)
+Filter column: notEquals(__table1.y, 0_UInt8)
 0	1
 1	2
 2	3
@@ -87,9 +87,9 @@ Aggregating
 Filter column: notEquals(y, 0)
 > (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed
 Filter column
-FUNCTION and(minus(s_0, 8_UInt8) :: 0, minus(s_0, 4_UInt8) :: 2) -> and(notEquals(y_1, 0_UInt8), minus(s_0, 8_UInt8), minus(s_0, 4_UInt8))
+FUNCTION and(minus(__table1.s, 8_UInt8) :: 0, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))
 Aggregating
-Filter column: notEquals(y_1, 0_UInt8)
+Filter column: notEquals(__table1.y, 0_UInt8)
 0	1
 1	2
 2	3
@@ -105,9 +105,9 @@ Aggregating
 Filter column: and(notEquals(y, 0), minus(y, 4))
 > (analyzer) two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased
 Filter column
-ALIAS notEquals(s_0, 8_UInt8) :: 0 -> and(notEquals(y_1, 0_UInt8), notEquals(s_0, 8_UInt8), minus(y_1, 4_UInt8))
+ALIAS notEquals(__table1.s, 8_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8))
 Aggregating
-Filter column: and(notEquals(y_1, 0_UInt8), minus(y_1, 4_UInt8))
+Filter column: and(notEquals(__table1.y, 0_UInt8), minus(__table1.y, 4_UInt8))
 0	1
 1	2
 2	3
@@ -121,9 +121,9 @@ Filter column: and(notEquals(y, 2), notEquals(x, 0))
 ARRAY JOIN x
 Filter column: notEquals(y, 2)
 > (analyzer) filter is split, one part is filtered before ARRAY JOIN
-Filter column: and(notEquals(y_1, 2_UInt8), notEquals(x_0, 0_UInt8))
-ARRAY JOIN x_0
-Filter column: notEquals(y_1, 2_UInt8)
+Filter column: and(notEquals(__table2.y, 2_UInt8), notEquals(__table1.x, 0_UInt8))
+ARRAY JOIN __table1.x
+Filter column: notEquals(__table2.y, 2_UInt8)
 1	3
 > filter is pushed down before Distinct
 Distinct
@@ -132,7 +132,7 @@ Filter column: notEquals(y, 2)
 > (analyzer) filter is pushed down before Distinct
 Distinct
 Distinct
-Filter column: notEquals(y_1, 2_UInt8)
+Filter column: notEquals(__table1.y, 2_UInt8)
 0	0
 0	1
 1	0
@@ -144,7 +144,7 @@ Filter column: and(notEquals(x, 0), notEquals(y, 0))
 > (analyzer) filter is pushed down before sorting steps
 Sorting
 Sorting
-Filter column: and(notEquals(x_0, 0_UInt8), notEquals(y_1, 0_UInt8))
+Filter column: and(notEquals(__table1.x, 0_UInt8), notEquals(__table1.y, 0_UInt8))
 1	2
 1	1
 > filter is pushed down before TOTALS HAVING and aggregating
@@ -154,7 +154,7 @@ Filter column: notEquals(y, 2)
 > (analyzer) filter is pushed down before TOTALS HAVING and aggregating
 TotalsHaving
 Aggregating
-Filter column: notEquals(y_0, 2_UInt8)
+Filter column: notEquals(__table1.y, 2_UInt8)
 0	12
 1	15
 3	10
@@ -174,7 +174,7 @@ Join
 > (analyzer) one condition of filter is pushed down before LEFT JOIN
 Join
 Join
-Filter column: notEquals(number_0, 1_UInt8)
+Filter column: notEquals(__table1.number, 1_UInt8)
 0	0
 3	3
 > one condition of filter is pushed down before INNER JOIN
@@ -185,7 +185,7 @@ Join
 > (analyzer) one condition of filter is pushed down before INNER JOIN
 Join
 Join
-Filter column: notEquals(number_0, 1_UInt8)
+Filter column: notEquals(__table1.number, 1_UInt8)
 3	3
 > filter is pushed down before UNION
 Union
diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh
index a765a6ea4fa..5a517264243 100755
--- a/tests/queries/0_stateless/01655_plan_optimizations.sh
+++ b/tests/queries/0_stateless/01655_plan_optimizations.sh
@@ -36,7 +36,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
     explain actions = 1 select s, y, y != 0 from (select sum(x) as s, y from (
         select number as x, number + 1 as y from numbers(10)) group by y
     ) where y != 0
-    settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter\|COLUMN Const(UInt8) -> notEquals(y_1, 0_UInt8)"
+    settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter\|COLUMN Const(UInt8) -> notEquals(__table1.y, 0_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select s, y, y != 0 from (select sum(x) as s, y from (
         select number as x, number + 1 as y from numbers(10)) group by y
@@ -56,7 +56,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
     ) where y != 0 and s != 4
     settings enable_optimize_predicate_expression=0" |
-        grep -o "Aggregating\|Filter column\|Filter column: notEquals(y_1, 0_UInt8)\|ALIAS notEquals(s_0, 4_UInt8) :: 0 -> and(notEquals(y_1, 0_UInt8), notEquals(s_0, 4_UInt8))"
+        grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|ALIAS notEquals(__table1.s, 4_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8))"
 $CLICKHOUSE_CLIENT -q "
     select s, y from (
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
@@ -76,7 +76,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
     ) where y != 0 and s - 4
     settings enable_optimize_predicate_expression=0" |
-        grep -o "Aggregating\|Filter column\|Filter column: notEquals(y_1, 0_UInt8)\|FUNCTION and(minus(s_0, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(y_1, 0_UInt8), minus(s_0, 4_UInt8)) UInt8 : 2"
+        grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8)) UInt8 : 2"
 $CLICKHOUSE_CLIENT -q "
     select s, y from (
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
@@ -96,7 +96,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --convert_query_to_cnf=0 -q "
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
     ) where y != 0 and s - 8 and s - 4
     settings enable_optimize_predicate_expression=0" |
-        grep -o "Aggregating\|Filter column\|Filter column: notEquals(y_1, 0_UInt8)\|FUNCTION and(minus(s_0, 8_UInt8) :: 0, minus(s_0, 4_UInt8) :: 2) -> and(notEquals(y_1, 0_UInt8), minus(s_0, 8_UInt8), minus(s_0, 4_UInt8))"
+        grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 0, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))"
 $CLICKHOUSE_CLIENT -q "
     select s, y from (
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
@@ -116,7 +116,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --convert_query_to_cnf=0 -q "
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
     ) where y != 0 and s != 8 and y - 4
     settings enable_optimize_predicate_expression=0" |
-    grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(y_1, 0_UInt8), minus(y_1, 4_UInt8))\|ALIAS notEquals(s_0, 8_UInt8) :: 0 -> and(notEquals(y_1, 0_UInt8), notEquals(s_0, 8_UInt8), minus(y_1, 4_UInt8))"
+    grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(__table1.y, 0_UInt8), minus(__table1.y, 4_UInt8))\|ALIAS notEquals(__table1.s, 8_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8))"
 $CLICKHOUSE_CLIENT -q "
     select s, y from (
         select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
@@ -134,7 +134,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
     explain actions = 1 select x, y from (
         select range(number) as x, number + 1 as y from numbers(3)
     ) array join x where y != 2 and x != 0" |
-    grep -o "Filter column: and(notEquals(y_1, 2_UInt8), notEquals(x_0, 0_UInt8))\|ARRAY JOIN x_0\|Filter column: notEquals(y_1, 2_UInt8)"
+    grep -o "Filter column: and(notEquals(__table2.y, 2_UInt8), notEquals(__table1.x, 0_UInt8))\|ARRAY JOIN __table1.x\|Filter column: notEquals(__table2.y, 2_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select x, y from (
         select range(number) as x, number + 1 as y from numbers(3)
@@ -166,7 +166,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
         select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10))
     ) where y != 2
     settings enable_optimize_predicate_expression=0" |
-    grep -o "Distinct\|Filter column: notEquals(y_1, 2_UInt8)"
+    grep -o "Distinct\|Filter column: notEquals(__table1.y, 2_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select x, y from (
         select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10))
@@ -186,7 +186,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --convert_query_to_cnf=0 -q "
         select number % 2 as x, number % 3 as y from numbers(6) order by y desc
     ) where x != 0 and y != 0
     settings enable_optimize_predicate_expression = 0" |
-    grep -o "Sorting\|Filter column: and(notEquals(x_0, 0_UInt8), notEquals(y_1, 0_UInt8))"
+    grep -o "Sorting\|Filter column: and(notEquals(__table1.x, 0_UInt8), notEquals(__table1.y, 0_UInt8))"
 $CLICKHOUSE_CLIENT -q "
     select x, y from (
         select number % 2 as x, number % 3 as y from numbers(6) order by y desc
@@ -206,7 +206,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
         select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals
     ) where y != 2
     settings enable_optimize_predicate_expression=0" |
-    grep -o "TotalsHaving\|Aggregating\|Filter column: notEquals(y_0, 2_UInt8)"
+    grep -o "TotalsHaving\|Aggregating\|Filter column: notEquals(__table1.y, 2_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select * from (
         select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals
@@ -236,7 +236,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
     select number as a, r.b from numbers(4) as l any left join (
         select number + 2 as b from numbers(3)
     ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
-    grep -o "Join\|Filter column: notEquals(number_0, 1_UInt8)"
+    grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select number as a, r.b from numbers(4) as l any left join (
         select number + 2 as b from numbers(3)
@@ -255,7 +255,7 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q "
     select number as a, r.b from numbers(4) as l any inner join (
         select number + 2 as b from numbers(3)
     ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
-        grep -o "Join\|Filter column: notEquals(number_0, 1_UInt8)"
+        grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)"
 $CLICKHOUSE_CLIENT -q "
     select number as a, r.b from numbers(4) as l any inner join (
         select number + 2 as b from numbers(3)
diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference
index d741391067c..0ff24b39709 100644
--- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference
+++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference
@@ -11,7 +11,7 @@ QUERY id: 0
           LIST id: 3, nodes: 1
             CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
   JOIN TREE
-    TABLE id: 5, table_name: system.one
+    TABLE id: 5, alias: __table1, table_name: system.one
   WHERE
     FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -54,7 +54,7 @@ QUERY id: 0
           LIST id: 3, nodes: 1
             CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
   JOIN TREE
-    TABLE id: 5, table_name: system.one
+    TABLE id: 5, alias: __table1, table_name: system.one
   WHERE
     FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference
index 685b3c83b05..ed4c9e5cee2 100644
--- a/tests/queries/0_stateless/02227_union_match_by_name.reference
+++ b/tests/queries/0_stateless/02227_union_match_by_name.reference
@@ -4,15 +4,15 @@ EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255
 Expression (Project names)
 Header: avgWeighted(x, y) Nullable(Float64)
   Expression (Projection)
-  Header: avgWeighted(x_0, y_1) Nullable(Float64)
+  Header: avgWeighted(__table1.x, __table1.y) Nullable(Float64)
     Aggregating
-    Header: avgWeighted(x_0, y_1) Nullable(Float64)
+    Header: avgWeighted(__table1.x, __table1.y) Nullable(Float64)
       Expression (Before GROUP BY)
-      Header: x_0 Nullable(UInt8)
-              y_1 UInt8
+      Header: __table1.x Nullable(UInt8)
+              __table1.y UInt8
         Expression (Change column names to column identifiers)
-        Header: x_0 Nullable(UInt8)
-                y_1 UInt8
+        Header: __table1.x Nullable(UInt8)
+                __table1.y UInt8
           Union
           Header: NULL Nullable(UInt8)
                   x Nullable(UInt8)
@@ -30,7 +30,7 @@ Header: avgWeighted(x, y) Nullable(Float64)
                         255_UInt8 UInt8
                         1_UInt8 UInt8
                   Expression (Change column names to column identifiers)
-                  Header: dummy_0 UInt8
+                  Header: __table3.dummy UInt8
                     ReadFromStorage (SystemOne)
                     Header: dummy UInt8
             Expression (Conversion before UNION)
@@ -46,7 +46,7 @@ Header: avgWeighted(x, y) Nullable(Float64)
                         NULL_Nullable(Nothing) Nullable(Nothing)
                         1_UInt8 UInt8
                   Expression (Change column names to column identifiers)
-                  Header: dummy_0 UInt8
+                  Header: __table5.dummy UInt8
                     ReadFromStorage (SystemOne)
                     Header: dummy UInt8
 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference
index 8d119fb22b2..53a0df682b2 100644
--- a/tests/queries/0_stateless/02303_query_kind.reference
+++ b/tests/queries/0_stateless/02303_query_kind.reference
@@ -2,35 +2,35 @@ clickhouse-client --allow_experimental_analyzer=1 --query_kind secondary_query -
 Expression ((Project names + Projection))
 Header: dummy String
   Aggregating
-  Header: toString(dummy_0) String
+  Header: toString(__table1.dummy) String
     Expression ((Before GROUP BY + Change column names to column identifiers))
-    Header: toString(dummy_0) String
+    Header: toString(__table1.dummy) String
       ReadFromStorage (SystemOne)
       Header: dummy UInt8
 clickhouse-local --allow_experimental_analyzer=1 --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy
 Expression ((Project names + Projection))
 Header: dummy String
   Aggregating
-  Header: toString(dummy_0) String
+  Header: toString(__table1.dummy) String
     Expression ((Before GROUP BY + Change column names to column identifiers))
-    Header: toString(dummy_0) String
+    Header: toString(__table1.dummy) String
       ReadFromStorage (SystemOne)
       Header: dummy UInt8
 clickhouse-client --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy
 Expression ((Project names + Projection))
 Header: dummy String
   Aggregating
-  Header: toString(dummy_0) String
+  Header: toString(__table1.dummy) String
     Expression ((Before GROUP BY + Change column names to column identifiers))
-    Header: toString(dummy_0) String
+    Header: toString(__table1.dummy) String
       ReadFromStorage (SystemOne)
       Header: dummy UInt8
 clickhouse-local --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy
 Expression ((Project names + Projection))
 Header: dummy String
   Aggregating
-  Header: toString(dummy_0) String
+  Header: toString(__table1.dummy) String
     Expression ((Before GROUP BY + Change column names to column identifiers))
-    Header: toString(dummy_0) String
+    Header: toString(__table1.dummy) String
       ReadFromStorage (SystemOne)
       Header: dummy UInt8
diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference
index 21ce47cc685..cacd2b86ce0 100644
--- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference
+++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference
@@ -83,36 +83,36 @@ Sorting (Stream): a ASC, b ASC
 Sorting (Stream): a ASC, b ASC
 === enable new analyzer ===
 -- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct
-Sorting (Stream): a_1 ASC, b_0 ASC
-Sorting (Stream): a_1 ASC, b_0 ASC
-Sorting (Stream): a_1 ASC, b_0 ASC
-Sorting (Stream): a_1 ASC, b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, b ASC
 -- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns
-Sorting (Stream): a_1 ASC
-Sorting (Stream): a_1 ASC
-Sorting (Stream): a_1 ASC
+Sorting (Stream): __table1.a ASC
+Sorting (Stream): __table1.a ASC
+Sorting (Stream): __table1.a ASC
 Sorting (Stream): a ASC
 -- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause
-Sorting (Stream): a_1 ASC, b_0 ASC
-Sorting (Stream): a_1 ASC, b_0 ASC
-Sorting (Stream): a_1 ASC, b_0 ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
 Sorting (Stream): a ASC, b ASC
 -- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause
-Sorting (Stream): a_1 DESC, b_0 DESC
-Sorting (Stream): a_1 DESC, b_0 DESC
-Sorting (Stream): a_1 DESC, b_0 DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
 Sorting (Stream): a DESC, b DESC
 -- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause
-Sorting (Stream): a_0 ASC, b_1 ASC
-Sorting (Stream): a_0 ASC, b_1 ASC
-Sorting (Stream): a_0 ASC, b_1 ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
 Sorting (Stream): a ASC, b ASC
 -- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause
-Sorting (Stream): a_1 DESC, b_0 DESC
-Sorting (Stream): a_1 DESC, b_0 DESC
-Sorting (Stream): a_1 DESC, b_0 DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
+Sorting (Stream): __table1.a DESC, __table1.b DESC
 Sorting (Stream): a DESC, b DESC
 -- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization
-Sorting (Stream): a_0 ASC, b_1 ASC
-Sorting (Stream): a_0 ASC, b_1 ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
+Sorting (Stream): __table1.a ASC, __table1.b ASC
 Sorting (Stream): a ASC, b ASC
diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference
index 5c9e39805b7..2c50d1028fe 100644
--- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference
+++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference
@@ -8,7 +8,7 @@ Sorting (None)
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
 Sorting (Global): a ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): a_0 ASC
+Sorting (Global): __table1.a ASC
 Sorting (None)
 Sorting (None)
 -- disable optimization -> sorting order is NOT propagated from subquery -> full sort
@@ -36,8 +36,8 @@ Sorting (Stream): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a
 Sorting (Global): a ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): a_0 ASC
-Sorting (Stream): a_0 ASC
+Sorting (Global): __table1.a ASC
+Sorting (Stream): __table1.a ASC
 Sorting (Stream): a ASC
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1
 Sorting (None)
@@ -48,8 +48,8 @@ Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1
 Sorting (None)
 Sorting (Sorting for ORDER BY)
-Sorting (Global): plus(a_0, 1_UInt8) ASC
-Sorting (Chunk): a_0 ASC
+Sorting (Global): plus(__table1.a, 1_UInt8) ASC
+Sorting (Chunk): __table1.a ASC
 Sorting (Chunk): a ASC
 -- ExpressionStep breaks sort mode
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1
@@ -61,7 +61,7 @@ Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1
 Sorting (Global): plus(a, 1) ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): plus(a_0, 1_UInt8) ASC
+Sorting (Global): plus(__table1.a, 1_UInt8) ASC
 Sorting (None)
 Sorting (Chunk): a ASC
 -- FilterStep preserves sort mode
@@ -71,7 +71,7 @@ Sorting (Chunk): a ASC
 Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0
 Sorting (Chunk): a ASC
-Sorting (Chunk): a_0 ASC
+Sorting (Chunk): __table1.a ASC
 Sorting (Chunk): a ASC
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0
 Sorting (Chunk): a ASC
@@ -79,7 +79,7 @@ Sorting (Chunk): a ASC
 Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0
 Sorting (Chunk): a ASC
-Sorting (Chunk): a_0 ASC
+Sorting (Chunk): __table1.a ASC
 Sorting (Chunk): a ASC
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0
 Sorting (Chunk): a ASC
@@ -87,7 +87,7 @@ Sorting (Chunk): a ASC
 Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0
 Sorting (Chunk): a ASC
-Sorting (Chunk): a_0 ASC
+Sorting (Chunk): __table1.a ASC
 Sorting (Chunk): a ASC
 -- FilterStep breaks sort mode
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0
@@ -119,11 +119,11 @@ Sorting (Stream): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a
 Sorting (Global): a ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): a_0 ASC
+Sorting (Global): __table1.a ASC
 Sorting (None)
 Sorting (Sorting for ORDER BY)
-Sorting (Global): a_2 ASC
-Sorting (Stream): a_2 ASC
+Sorting (Global): __table3.a ASC
+Sorting (Stream): __table3.a ASC
 Sorting (Stream): a ASC
 -- aliases DONT break sorting order
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)
@@ -135,8 +135,8 @@ Sorting (Stream): a ASC, b ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)
 Sorting (Global): a ASC, b ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): x_2 ASC, y_3 ASC
-Sorting (Stream): x_2 ASC, y_3 ASC
+Sorting (Global): __table2.x ASC, __table2.y ASC
+Sorting (Stream): __table2.x ASC, __table2.y ASC
 Sorting (Stream): a ASC, b ASC
 -- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1)
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1
@@ -151,11 +151,11 @@ Sorting (Chunk): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1
 Sorting (None)
 Sorting (Sorting for ORDER BY)
-Sorting (Global): plus(a_0, 1_UInt8) ASC
-Sorting (Global): plus(a_3, 1_UInt8) ASC
+Sorting (Global): plus(__table1.a, 1_UInt8) ASC
+Sorting (Global): plus(__table3.a, 1_UInt8) ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): plus(a_3, 1_UInt8) ASC
-Sorting (Chunk): a_3 ASC
+Sorting (Global): plus(__table3.a, 1_UInt8) ASC
+Sorting (Chunk): __table3.a ASC
 Sorting (Chunk): a ASC
 -- check that correct sorting info is provided in case of only prefix of sorting key is in ORDER BY clause but all sorting key columns returned by query
 -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a
@@ -167,6 +167,6 @@ Sorting (Stream): a ASC
 -- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a
 Sorting (Global): a ASC
 Sorting (Sorting for ORDER BY)
-Sorting (Global): a_0 ASC
-Sorting (Stream): a_0 ASC
+Sorting (Global): __table1.a ASC
+Sorting (Stream): __table1.a ASC
 Sorting (Stream): a ASC
diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.reference b/tests/queries/0_stateless/02451_order_by_monotonic.reference
index 05f20a9bad8..4b2f9f7e227 100644
--- a/tests/queries/0_stateless/02451_order_by_monotonic.reference
+++ b/tests/queries/0_stateless/02451_order_by_monotonic.reference
@@ -4,19 +4,19 @@
 2022-09-09 12:00:00	0x
 2022-09-09 12:00:00	1
 2022-09-09 12:00:00	1x
-  Prefix sort description: toStartOfMinute(t_0) ASC
-  Result sort description: toStartOfMinute(t_0) ASC, c1_1 ASC
-  Prefix sort description: toStartOfMinute(t_0) ASC
-  Result sort description: toStartOfMinute(t_0) ASC
-  Prefix sort description: negate(a_0) ASC
-  Result sort description: negate(a_0) ASC
-  Prefix sort description: negate(a_0) ASC, negate(b_1) ASC
-  Result sort description: negate(a_0) ASC, negate(b_1) ASC
-  Prefix sort description: a_0 DESC, negate(b_1) ASC
-  Result sort description: a_0 DESC, negate(b_1) ASC
-  Prefix sort description: negate(a_0) ASC, b_1 DESC
-  Result sort description: negate(a_0) ASC, b_1 DESC
-  Prefix sort description: negate(a_0) ASC
-  Result sort description: negate(a_0) ASC, b_1 ASC
-  Prefix sort description: a_0 ASC
-  Result sort description: a_0 ASC, negate(b_1) ASC
+  Prefix sort description: toStartOfMinute(__table1.t) ASC
+  Result sort description: toStartOfMinute(__table1.t) ASC, __table1.c1 ASC
+  Prefix sort description: toStartOfMinute(__table1.t) ASC
+  Result sort description: toStartOfMinute(__table1.t) ASC
+  Prefix sort description: negate(__table1.a) ASC
+  Result sort description: negate(__table1.a) ASC
+  Prefix sort description: negate(__table1.a) ASC, negate(__table1.b) ASC
+  Result sort description: negate(__table1.a) ASC, negate(__table1.b) ASC
+  Prefix sort description: __table1.a DESC, negate(__table1.b) ASC
+  Result sort description: __table1.a DESC, negate(__table1.b) ASC
+  Prefix sort description: negate(__table1.a) ASC, __table1.b DESC
+  Result sort description: negate(__table1.a) ASC, __table1.b DESC
+  Prefix sort description: negate(__table1.a) ASC
+  Result sort description: negate(__table1.a) ASC, __table1.b ASC
+  Prefix sort description: __table1.a ASC
+  Result sort description: __table1.a ASC, negate(__table1.b) ASC
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index 84589668d64..048a9000f2b 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -8,7 +8,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -26,7 +26,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -44,7 +44,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference
index eccf51501ed..23e91dc2703 100644
--- a/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference
+++ b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference
@@ -16,7 +16,7 @@ QUERY id: 0
                         CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
                   CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
@@ -41,7 +41,7 @@ QUERY id: 0
                         CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
                   CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
@@ -69,7 +69,7 @@ QUERY id: 0
                               CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
                         CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 14, nodes: 1
           CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference b/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference
index 583da07380e..f24edd96996 100644
--- a/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference
+++ b/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference
@@ -12,7 +12,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value UInt64
       PROJECTION
@@ -23,7 +23,7 @@ QUERY id: 0
                 COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8
                 CONSTANT id: 9, constant_value: \'a\', constant_value_type: String
       JOIN TREE
-        TABLE id: 8, table_name: default.test_table
+        TABLE id: 8, alias: __table2, table_name: default.test_table
 SELECT '--';
 --
 EXPLAIN QUERY TREE SELECT value FROM (
@@ -36,14 +36,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value UInt64
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: value.a, result_type: UInt64, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_table
+        TABLE id: 6, alias: __table2, table_name: default.test_table
   SETTINGS optimize_functions_to_subcolumns=1
 SELECT '--';
 --
@@ -57,7 +57,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value UInt64
       PROJECTION
@@ -68,7 +68,7 @@ QUERY id: 0
                 COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8
                 CONSTANT id: 9, constant_value: \'a\', constant_value_type: String
       JOIN TREE
-        TABLE id: 8, table_name: default.test_table
+        TABLE id: 8, alias: __table2, table_name: default.test_table
       SETTINGS optimize_functions_to_subcolumns=0
   SETTINGS optimize_functions_to_subcolumns=1
 SELECT '--';
@@ -83,7 +83,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value UInt64
       PROJECTION
@@ -94,7 +94,7 @@ QUERY id: 0
                 COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8
                 CONSTANT id: 9, constant_value: \'a\', constant_value_type: String
       JOIN TREE
-        TABLE id: 8, table_name: default.test_table
+        TABLE id: 8, alias: __table2, table_name: default.test_table
   SETTINGS optimize_functions_to_subcolumns=0
 SELECT '--';
 --
@@ -108,13 +108,13 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         value UInt64
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: value.a, result_type: UInt64, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_table
+        TABLE id: 6, alias: __table2, table_name: default.test_table
       SETTINGS optimize_functions_to_subcolumns=1
   SETTINGS optimize_functions_to_subcolumns=0
diff --git a/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference
index e4d7ff55b86..5b9bc206695 100644
--- a/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference
+++ b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference
@@ -29,9 +29,9 @@ QUERY id: 0
       LEFT TABLE EXPRESSION
         JOIN id: 11, strictness: ALL, kind: INNER
           LEFT TABLE EXPRESSION
-            TABLE id: 3, table_name: default.t1
+            TABLE id: 3, alias: __table1, table_name: default.t1
           RIGHT TABLE EXPRESSION
-            TABLE id: 6, table_name: default.t2
+            TABLE id: 6, alias: __table2, table_name: default.t2
           JOIN EXPRESSION
             FUNCTION id: 12, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
@@ -48,14 +48,14 @@ QUERY id: 0
                         COLUMN id: 21, column_name: a, result_type: UInt64, source_id: 6
                         CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
       RIGHT TABLE EXPRESSION
-        QUERY id: 9, alias: t3, is_subquery: 1
+        QUERY id: 9, alias: __table3, is_subquery: 1
           PROJECTION COLUMNS
             x UInt64
           PROJECTION
             LIST id: 23, nodes: 1
               COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25
           JOIN TREE
-            TABLE id: 25, table_name: default.t3
+            TABLE id: 25, alias: __table4, table_name: default.t3
           WHERE
             FUNCTION id: 26, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
@@ -97,18 +97,18 @@ QUERY id: 0
       LEFT TABLE EXPRESSION
         JOIN id: 11, kind: COMMA
           LEFT TABLE EXPRESSION
-            TABLE id: 3, table_name: default.t1
+            TABLE id: 3, alias: __table1, table_name: default.t1
           RIGHT TABLE EXPRESSION
-            TABLE id: 6, table_name: default.t2
+            TABLE id: 6, alias: __table2, table_name: default.t2
       RIGHT TABLE EXPRESSION
-        QUERY id: 9, alias: t3, is_subquery: 1
+        QUERY id: 9, alias: __table3, is_subquery: 1
           PROJECTION COLUMNS
             x UInt64
           PROJECTION
             LIST id: 12, nodes: 1
               COLUMN id: 13, column_name: a, result_type: UInt64, source_id: 14
           JOIN TREE
-            TABLE id: 14, table_name: default.t3
+            TABLE id: 14, alias: __table4, table_name: default.t3
           WHERE
             FUNCTION id: 15, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
@@ -166,9 +166,9 @@ QUERY id: 0
       LEFT TABLE EXPRESSION
         JOIN id: 11, strictness: ALL, kind: INNER
           LEFT TABLE EXPRESSION
-            TABLE id: 3, table_name: default.t1
+            TABLE id: 3, alias: __table1, table_name: default.t1
           RIGHT TABLE EXPRESSION
-            TABLE id: 6, table_name: default.t2
+            TABLE id: 6, alias: __table2, table_name: default.t2
           JOIN EXPRESSION
             FUNCTION id: 12, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
@@ -185,14 +185,14 @@ QUERY id: 0
                         COLUMN id: 21, column_name: a, result_type: UInt64, source_id: 6
                         CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
       RIGHT TABLE EXPRESSION
-        QUERY id: 9, alias: t3, is_subquery: 1
+        QUERY id: 9, alias: __table3, is_subquery: 1
           PROJECTION COLUMNS
             x UInt64
           PROJECTION
             LIST id: 23, nodes: 1
               COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25
           JOIN TREE
-            TABLE id: 25, table_name: default.t3
+            TABLE id: 25, alias: __table4, table_name: default.t3
           WHERE
             FUNCTION id: 26, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
diff --git a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference
index 3d169126eef..ed5e1e08356 100644
--- a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference
+++ b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference
@@ -1,21 +1,21 @@
 Expression ((Project names + (Projection + )))
 Header: number UInt64
-Actions: INPUT : 0 -> number_1 UInt64 : 0
-         ALIAS number_1 :: 0 -> number UInt64 : 1
-         ALIAS number :: 1 -> number_0 UInt64 : 0
-         ALIAS number_0 :: 0 -> number UInt64 : 1
+Actions: INPUT : 0 -> __table2.number UInt64 : 0
+         ALIAS __table2.number :: 0 -> number UInt64 : 1
+         ALIAS number :: 1 -> __table1.number UInt64 : 0
+         ALIAS __table1.number :: 0 -> number UInt64 : 1
 Positions: 1
   Sorting (Sorting for ORDER BY)
   Header: ignore(2_UInt8) UInt8
-          number_1 UInt64
+          __table2.number UInt64
   Sort description: ignore(2_UInt8) ASC
     Filter (( + (Before ORDER BY + (Projection + Change column names to column identifiers))))
     Header: ignore(2_UInt8) UInt8
-            number_1 UInt64
+            __table2.number UInt64
     Filter column: ignore(2_UInt8)
     Actions: INPUT : 0 -> number UInt64 : 0
              COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 1
-             ALIAS number :: 0 -> number_1 UInt64 : 2
+             ALIAS number :: 0 -> __table2.number UInt64 : 2
              FUNCTION ignore(2_UInt8 :: 1) -> ignore(2_UInt8) UInt8 : 0
     Positions: 0 2
       ReadFromStorage (SystemNumbers)
diff --git a/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference
index b6964976c20..f4e09c4b4de 100644
--- a/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference
+++ b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference
@@ -26,7 +26,7 @@ QUERY id: 0
                           LIST id: 14, nodes: 1
                             CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 16, table_function_name: numbers
+    TABLE_FUNCTION id: 16, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 17, nodes: 1
           CONSTANT id: 18, constant_value: UInt64_10, constant_value_type: UInt8
@@ -58,7 +58,7 @@ QUERY id: 0
                           LIST id: 14, nodes: 1
                             CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 16, table_function_name: numbers
+    TABLE_FUNCTION id: 16, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 17, nodes: 1
           CONSTANT id: 18, constant_value: UInt64_10, constant_value_type: UInt8
@@ -81,7 +81,7 @@ QUERY id: 0
                             CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8
             CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
@@ -104,7 +104,7 @@ QUERY id: 0
                             CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8
             CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 11, table_function_name: numbers
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 12, nodes: 1
           CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
index 986ecffcdf8..68aa35378f8 100644
--- a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
+++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
@@ -2,27 +2,27 @@ Expression ((Project names + (Projection + )))
 Header: t1.id UInt64
         t1.value String
         t2.value String
-Actions: INPUT : 0 -> id_0 UInt64 : 0
-         INPUT : 1 -> value_1 String : 1
-         INPUT : 2 -> value_2 String : 2
-         ALIAS id_0 :: 0 -> t1.id UInt64 : 3
-         ALIAS value_1 :: 1 -> t1.value String : 0
-         ALIAS value_2 :: 2 -> t2.value String : 1
+Actions: INPUT : 0 -> __table1.id UInt64 : 0
+         INPUT : 1 -> __table1.value String : 1
+         INPUT : 2 -> __table2.value String : 2
+         ALIAS __table1.id :: 0 -> t1.id UInt64 : 3
+         ALIAS __table1.value :: 1 -> t1.value String : 0
+         ALIAS __table2.value :: 2 -> t2.value String : 1
 Positions: 3 0 1
   FilledJoin (Filled JOIN)
-  Header: id_0 UInt64
-          value_1 String
-          value_2 String
+  Header: __table1.id UInt64
+          __table1.value String
+          __table2.value String
     Filter (( + (JOIN actions + Change column names to column identifiers)))
-    Header: id_0 UInt64
-            value_1 String
-    Filter column: equals(id_0, 0_UInt8) (removed)
+    Header: __table1.id UInt64
+            __table1.value String
+    Filter column: equals(__table1.id, 0_UInt8) (removed)
     Actions: INPUT : 0 -> id UInt64 : 0
              INPUT : 1 -> value String : 1
              COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 2
-             ALIAS id :: 0 -> id_0 UInt64 : 3
-             ALIAS value :: 1 -> value_1 String : 0
-             FUNCTION equals(id_0 : 3, 0_UInt8 :: 2) -> equals(id_0, 0_UInt8) UInt8 : 1
+             ALIAS id :: 0 -> __table1.id UInt64 : 3
+             ALIAS value :: 1 -> __table1.value String : 0
+             FUNCTION equals(__table1.id : 3, 0_UInt8 :: 2) -> equals(__table1.id, 0_UInt8) UInt8 : 1
     Positions: 1 3 0
       ReadFromMergeTree (default.test_table)
       Header: id UInt64
diff --git a/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference
index cc16a1fce02..4a4e338438b 100644
--- a/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference
+++ b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference
@@ -29,10 +29,10 @@ Header: id UInt64
         value String
 Actions: INPUT : 0 -> id UInt64 : 0
          INPUT : 1 -> value String : 1
-         ALIAS id :: 0 -> id_0 UInt64 : 2
-         ALIAS value :: 1 -> value_1 String : 0
-         ALIAS id_0 :: 2 -> id UInt64 : 1
-         ALIAS value_1 :: 0 -> value String : 2
+         ALIAS id :: 0 -> __table1.id UInt64 : 2
+         ALIAS value :: 1 -> __table1.value String : 0
+         ALIAS __table1.id :: 2 -> id UInt64 : 1
+         ALIAS __table1.value :: 0 -> value String : 2
 Positions: 1 2
   ReadFromMergeTree (default.test_table)
   Header: id UInt64
diff --git a/tests/queries/0_stateless/02835_join_step_explain.reference b/tests/queries/0_stateless/02835_join_step_explain.reference
index 0cc2e802682..06f4a9cfc99 100644
--- a/tests/queries/0_stateless/02835_join_step_explain.reference
+++ b/tests/queries/0_stateless/02835_join_step_explain.reference
@@ -3,31 +3,31 @@ Header: id UInt64
         value_1 String
         rhs.id UInt64
         rhs.value_1 String
-Actions: INPUT : 0 -> id_0 UInt64 : 0
-         INPUT : 1 -> value_1_1 String : 1
-         INPUT : 2 -> value_1_3 String : 2
-         INPUT : 3 -> id_2 UInt64 : 3
-         ALIAS id_0 :: 0 -> id UInt64 : 4
-         ALIAS value_1_1 :: 1 -> value_1 String : 0
-         ALIAS value_1_3 :: 2 -> rhs.value_1 String : 1
-         ALIAS id_2 :: 3 -> rhs.id UInt64 : 2
+Actions: INPUT : 0 -> __table1.id UInt64 : 0
+         INPUT : 1 -> __table1.value_1 String : 1
+         INPUT : 2 -> __table2.value_1 String : 2
+         INPUT : 3 -> __table2.id UInt64 : 3
+         ALIAS __table1.id :: 0 -> id UInt64 : 4
+         ALIAS __table1.value_1 :: 1 -> value_1 String : 0
+         ALIAS __table2.value_1 :: 2 -> rhs.value_1 String : 1
+         ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2
 Positions: 4 0 2 1
   Join (JOIN FillRightFirst)
-  Header: id_0 UInt64
-          value_1_1 String
-          value_1_3 String
-          id_2 UInt64
+  Header: __table1.id UInt64
+          __table1.value_1 String
+          __table2.value_1 String
+          __table2.id UInt64
   Type: INNER
   Strictness: ALL
   Algorithm: HashJoin
-  Clauses: [(id_0) = (id_2)]
+  Clauses: [(__table1.id) = (__table2.id)]
     Expression ((JOIN actions + Change column names to column identifiers))
-    Header: id_0 UInt64
-            value_1_1 String
+    Header: __table1.id UInt64
+            __table1.value_1 String
     Actions: INPUT : 0 -> id UInt64 : 0
              INPUT : 1 -> value_1 String : 1
-             ALIAS id :: 0 -> id_0 UInt64 : 2
-             ALIAS value_1 :: 1 -> value_1_1 String : 0
+             ALIAS id :: 0 -> __table1.id UInt64 : 2
+             ALIAS value_1 :: 1 -> __table1.value_1 String : 0
     Positions: 2 0
       ReadFromMergeTree (default.test_table_1)
       Header: id UInt64
@@ -36,12 +36,12 @@ Positions: 4 0 2 1
       Parts: 1
       Granules: 1
     Expression ((JOIN actions + Change column names to column identifiers))
-    Header: id_2 UInt64
-            value_1_3 String
+    Header: __table2.id UInt64
+            __table2.value_1 String
     Actions: INPUT : 0 -> id UInt64 : 0
              INPUT : 1 -> value_1 String : 1
-             ALIAS id :: 0 -> id_2 UInt64 : 2
-             ALIAS value_1 :: 1 -> value_1_3 String : 0
+             ALIAS id :: 0 -> __table2.id UInt64 : 2
+             ALIAS value_1 :: 1 -> __table2.value_1 String : 0
     Positions: 2 0
       ReadFromMergeTree (default.test_table_2)
       Header: id UInt64
@@ -55,39 +55,39 @@ Header: id UInt64
         value_1 String
         rhs.id UInt64
         rhs.value_1 String
-Actions: INPUT : 0 -> id_0 UInt64 : 0
-         INPUT : 1 -> value_1_1 String : 1
-         INPUT :: 2 -> value_2_4 UInt64 : 2
-         INPUT : 3 -> value_1_3 String : 3
-         INPUT :: 4 -> value_2_5 UInt64 : 4
-         INPUT : 5 -> id_2 UInt64 : 5
-         ALIAS id_0 :: 0 -> id UInt64 : 6
-         ALIAS value_1_1 :: 1 -> value_1 String : 0
-         ALIAS value_1_3 :: 3 -> rhs.value_1 String : 1
-         ALIAS id_2 :: 5 -> rhs.id UInt64 : 3
+Actions: INPUT : 0 -> __table1.id UInt64 : 0
+         INPUT : 1 -> __table1.value_1 String : 1
+         INPUT :: 2 -> __table1.value_2 UInt64 : 2
+         INPUT : 3 -> __table2.value_1 String : 3
+         INPUT :: 4 -> __table2.value_2 UInt64 : 4
+         INPUT : 5 -> __table2.id UInt64 : 5
+         ALIAS __table1.id :: 0 -> id UInt64 : 6
+         ALIAS __table1.value_1 :: 1 -> value_1 String : 0
+         ALIAS __table2.value_1 :: 3 -> rhs.value_1 String : 1
+         ALIAS __table2.id :: 5 -> rhs.id UInt64 : 3
 Positions: 6 0 3 1
   Join (JOIN FillRightFirst)
-  Header: id_0 UInt64
-          value_1_1 String
-          value_2_4 UInt64
-          value_1_3 String
-          value_2_5 UInt64
-          id_2 UInt64
+  Header: __table1.id UInt64
+          __table1.value_1 String
+          __table1.value_2 UInt64
+          __table2.value_1 String
+          __table2.value_2 UInt64
+          __table2.id UInt64
   Type: INNER
   Strictness: ASOF
   Algorithm: HashJoin
   ASOF inequality: LESS
-  Clauses: [(id_0, value_2_4) = (id_2, value_2_5)]
+  Clauses: [(__table1.id, __table1.value_2) = (__table2.id, __table2.value_2)]
     Expression ((JOIN actions + Change column names to column identifiers))
-    Header: id_0 UInt64
-            value_1_1 String
-            value_2_4 UInt64
+    Header: __table1.id UInt64
+            __table1.value_1 String
+            __table1.value_2 UInt64
     Actions: INPUT : 0 -> id UInt64 : 0
              INPUT : 1 -> value_1 String : 1
              INPUT : 2 -> value_2 UInt64 : 2
-             ALIAS id :: 0 -> id_0 UInt64 : 3
-             ALIAS value_1 :: 1 -> value_1_1 String : 0
-             ALIAS value_2 :: 2 -> value_2_4 UInt64 : 1
+             ALIAS id :: 0 -> __table1.id UInt64 : 3
+             ALIAS value_1 :: 1 -> __table1.value_1 String : 0
+             ALIAS value_2 :: 2 -> __table1.value_2 UInt64 : 1
     Positions: 3 0 1
       ReadFromMergeTree (default.test_table_1)
       Header: id UInt64
@@ -97,15 +97,15 @@ Positions: 6 0 3 1
       Parts: 1
       Granules: 1
     Expression ((JOIN actions + Change column names to column identifiers))
-    Header: id_2 UInt64
-            value_1_3 String
-            value_2_5 UInt64
+    Header: __table2.id UInt64
+            __table2.value_1 String
+            __table2.value_2 UInt64
     Actions: INPUT : 0 -> id UInt64 : 0
              INPUT : 1 -> value_1 String : 1
              INPUT : 2 -> value_2 UInt64 : 2
-             ALIAS id :: 0 -> id_2 UInt64 : 3
-             ALIAS value_1 :: 1 -> value_1_3 String : 0
-             ALIAS value_2 :: 2 -> value_2_5 UInt64 : 1
+             ALIAS id :: 0 -> __table2.id UInt64 : 3
+             ALIAS value_1 :: 1 -> __table2.value_1 String : 0
+             ALIAS value_2 :: 2 -> __table2.value_2 UInt64 : 1
     Positions: 3 0 1
       ReadFromMergeTree (default.test_table_2)
       Header: id UInt64

From e936464dc6e7767a6e39e7b2f4f1f8144a5aa165 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 24 Aug 2023 23:25:05 +0000
Subject: [PATCH 006/204] Update more reference files

---
 .../01561_clickhouse_client_stage.reference   |  2 +-
 .../01591_window_functions.reference          | 14 +++++-----
 .../0_stateless/01823_explain_json.reference  | 26 +++++++++----------
 .../02048_clickhouse_local_stage.reference    |  2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference
index 8a34751b071..2631199cbab 100644
--- a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference
+++ b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference
@@ -2,7 +2,7 @@ execute: --allow_experimental_analyzer=1
 "foo"
 1
 execute: --allow_experimental_analyzer=1 --stage fetch_columns
-"dummy_0"
+"__table1.dummy"
 0
 execute: --allow_experimental_analyzer=1 --stage with_mergeable_state
 "1_UInt8"
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index ce9c6f4589e..abb628af3b7 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -917,9 +917,9 @@ from
 ;
 Expression ((Project names + Projection))
   Window (Window step for window \'\')
-    Window (Window step for window \'PARTITION BY p_0\')
-      Window (Window step for window \'PARTITION BY p_0 ORDER BY o_1 ASC\')
-        Sorting (Sorting for window \'PARTITION BY p_0 ORDER BY o_1 ASC\')
+    Window (Window step for window \'PARTITION BY __table1.p\')
+      Window (Window step for window \'PARTITION BY __table1.p ORDER BY __table1.o ASC\')
+        Sorting (Sorting for window \'PARTITION BY __table1.p ORDER BY __table1.o ASC\')
           Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
             ReadFromStorage (SystemNumbers)
 explain select
@@ -930,11 +930,11 @@ from
         from numbers(16)) t
 ;
 Expression ((Project names + Projection))
-  Window (Window step for window \'ORDER BY o_0 ASC, number_1 ASC\')
-    Sorting (Sorting for window \'ORDER BY o_0 ASC, number_1 ASC\')
-      Window (Window step for window \'ORDER BY number_1 ASC\')
+  Window (Window step for window \'ORDER BY __table1.o ASC, __table1.number ASC\')
+    Sorting (Sorting for window \'ORDER BY __table1.o ASC, __table1.number ASC\')
+      Window (Window step for window \'ORDER BY __table1.number ASC\')
         Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))) [lifted up part])
-          Sorting (Sorting for window \'ORDER BY number_1 ASC\')
+          Sorting (Sorting for window \'ORDER BY __table1.number ASC\')
             Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
               ReadFromStorage (SystemNumbers)
 -- A test case for the sort comparator found by fuzzer.
diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference
index befbf82f4fb..23fb34c2192 100644
--- a/tests/queries/0_stateless/01823_explain_json.reference
+++ b/tests/queries/0_stateless/01823_explain_json.reference
@@ -37,59 +37,59 @@
           "Node Type": "Aggregating",
           "Header": [
             {
-              "Name": "number_0",
+              "Name": "__table1.number",
               "Type": "UInt64"
             },
             {
-              "Name": "quantile(0.2_Float64)(number_0)",
+              "Name": "quantile(0.2_Float64)(__table1.number)",
               "Type": "Float64"
             },
             {
-              "Name": "sumIf(number_0, greater(number_0, 0_UInt8))",
+              "Name": "sumIf(__table1.number, greater(__table1.number, 0_UInt8))",
               "Type": "UInt64"
             }
           ],
-          "Keys": ["number_0"],
+          "Keys": ["__table1.number"],
           "Aggregates": [
             {
-              "Name": "quantile(0.2_Float64)(number_0)",
+              "Name": "quantile(0.2_Float64)(__table1.number)",
               "Function": {
                 "Name": "quantile",
                 "Parameters": ["0.2"],
                 "Argument Types": ["UInt64"],
                 "Result Type": "Float64"
               },
-              "Arguments": ["number_0"]
+              "Arguments": ["__table1.number"]
             },
             {
-              "Name": "sumIf(number_0, greater(number_0, 0_UInt8))",
+              "Name": "sumIf(__table1.number, greater(__table1.number, 0_UInt8))",
               "Function": {
                 "Name": "sumIf",
                 "Argument Types": ["UInt64", "UInt8"],
                 "Result Type": "UInt64"
               },
-              "Arguments": ["number_0", "greater(number_0, 0_UInt8)"]
+              "Arguments": ["__table1.number", "greater(__table1.number, 0_UInt8)"]
             }
           ],
 --------
           "Node Type": "ArrayJoin",
           "Left": false,
-          "Columns": ["x_0", "y_1"],
+          "Columns": ["__table1.x", "__table1.y"],
 --------
           "Node Type": "Distinct",
-          "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"],
+          "Columns": ["intDiv(__table1.number, 2_UInt8)", "intDiv(__table1.number, 3_UInt8)"],
 --
               "Node Type": "Distinct",
-              "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"],
+              "Columns": ["intDiv(__table1.number, 2_UInt8)", "intDiv(__table1.number, 3_UInt8)"],
 --------
               "Sort Description": [
                 {
-                  "Column": "number_0",
+                  "Column": "__table1.number",
                   "Ascending": false,
                   "With Fill": false
                 },
                 {
-                  "Column": "plus(number_0, 1_UInt8)",
+                  "Column": "plus(__table1.number, 1_UInt8)",
                   "Ascending": true,
                   "With Fill": false
                 }
diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference
index 8a34751b071..2631199cbab 100644
--- a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference
+++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference
@@ -2,7 +2,7 @@ execute: --allow_experimental_analyzer=1
 "foo"
 1
 execute: --allow_experimental_analyzer=1 --stage fetch_columns
-"dummy_0"
+"__table1.dummy"
 0
 execute: --allow_experimental_analyzer=1 --stage with_mergeable_state
 "1_UInt8"

From 96efe68e633a297a66c8ae1813d1141833bed025 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 25 Aug 2023 14:04:30 +0000
Subject: [PATCH 007/204] Update reference file

---
 .../02479_mysql_connect_to_self.reference            | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
index f4dd01bc184..6838dacc3b3 100644
--- a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
+++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
@@ -50,7 +50,7 @@ QUERY id: 0
       COLUMN id: 5, column_name: b, result_type: String, source_id: 3
       COLUMN id: 6, column_name: c, result_type: String, source_id: 3
   JOIN TREE
-    TABLE_FUNCTION id: 3, table_function_name: mysql
+    TABLE_FUNCTION id: 3, alias: __table1, table_function_name: mysql
       ARGUMENTS
         LIST id: 7, nodes: 5
           CONSTANT id: 8, constant_value: \'127.0.0.1:9004\', constant_value_type: String
@@ -63,10 +63,10 @@ QUERY id: 0
       SETTINGS connection_wait_timeout=123 connect_timeout=40123002 read_write_timeout=40123001 connection_pool_size=3
 
 SELECT
-    key AS key,
-    a AS a,
-    b AS b,
-    c AS c
-FROM mysql(\'127.0.0.1:9004\', \'default\', foo, \'default\', \'\', SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3)
+    __table1.key AS key,
+    __table1.a AS a,
+    __table1.b AS b,
+    __table1.c AS c
+FROM mysql(\'127.0.0.1:9004\', \'default\', foo, \'default\', \'\', SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3) AS __table1
 ---
 5

From cfeba3f02eb528ef08c17032e9e2c6bae5bb542e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 25 Aug 2023 18:01:25 +0000
Subject: [PATCH 008/204] Add a comment

---
 src/Analyzer/createUniqueTableAliases.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 782f2f35749..4b7f5573451 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -26,6 +26,9 @@ public:
                 [[fallthrough]];
             case QueryTreeNodeType::UNION:
             {
+                /// Queries like `(SELECT 1) as t` have invalid syntax. To avoid creating such queries (e.g. in StorageDistributed)
+                /// we need to remove aliases for top level queries.
+                /// N.B. Subquery depth starts count from 1, so the following condition checks if it's a top level.
                 if (getSubqueryDepth() == 1)
                 {
                     node->removeAlias();

From 0a78dc4c574f89ba820df11e3d07172bc2766bb4 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 28 Aug 2023 16:44:25 +0000
Subject: [PATCH 009/204] Use scopes for table aliases creation

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp |  9 +++--
 src/Analyzer/createUniqueTableAliases.cpp | 41 ++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 1803cc901c5..6a9b4da4e16 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1192,7 +1192,7 @@ private:
 
     static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope);
 
-    static void replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope);
+    void replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope);
 
     static void convertLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope);
 
@@ -2132,7 +2132,12 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
                 scope.scope_node->formatASTForErrorMessage());
 
         --positional_argument_number;
-        *node_to_replace = projection_nodes[positional_argument_number];
+        *node_to_replace = projection_nodes[positional_argument_number]->clone();
+        if (auto it = resolved_expressions.find(projection_nodes[positional_argument_number]);
+            it != resolved_expressions.end())
+        {
+            resolved_expressions[*node_to_replace] = it->second;
+        }
     }
 }
 
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 4b7f5573451..a836380b5c1 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -2,6 +2,7 @@
 #include <Analyzer/createUniqueTableAliases.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/IQueryTreeNode.h>
+#include "Common/logger_useful.h"
 
 namespace DB
 {
@@ -20,7 +21,8 @@ public:
 
     void enterImpl(QueryTreeNodePtr & node)
     {
-        switch (node->getNodeType())
+        auto node_type = node->getNodeType();
+        switch (node_type)
         {
             case QueryTreeNodeType::QUERY:
                 [[fallthrough]];
@@ -45,6 +47,7 @@ public:
                 auto & alias = table_expression_to_alias[node];
                 if (alias.empty())
                 {
+                    scope_to_nodes_with_aliases[scope_nodes_stack.back()].push_back(node);
                     alias = fmt::format("__table{}", table_expression_to_alias.size());
                     node->setAlias(alias);
                 }
@@ -53,8 +56,44 @@ public:
             default:
                 break;
         }
+
+        switch (node_type)
+        {
+            case QueryTreeNodeType::QUERY:
+                [[fallthrough]];
+            case QueryTreeNodeType::UNION:
+                [[fallthrough]];
+            case QueryTreeNodeType::LAMBDA:
+                scope_nodes_stack.push_back(node);
+                break;
+            default:
+                break;
+        }
     }
+
+    void leaveImpl(QueryTreeNodePtr & node)
+    {
+        if (scope_nodes_stack.back() == node)
+        {
+            if (auto it = scope_to_nodes_with_aliases.find(scope_nodes_stack.back());
+                it != scope_to_nodes_with_aliases.end())
+            {
+                for (const auto & node_with_alias : it->second)
+                {
+                    table_expression_to_alias.erase(node_with_alias);
+                }
+                scope_to_nodes_with_aliases.erase(it);
+            }
+            scope_nodes_stack.pop_back();
+        }
+    }
+
 private:
+    // Stack of nodes which create scopes: QUERY, UNION and LAMBDA.
+    QueryTreeNodes scope_nodes_stack;
+
+    std::unordered_map<QueryTreeNodePtr, QueryTreeNodes> scope_to_nodes_with_aliases;
+
     // We need to use raw pointer as a key, not a QueryTreeNodePtrWithHash.
     std::unordered_map<QueryTreeNodePtr, String> table_expression_to_alias;
 };

From ae88180556eb736fa1e17f1fa56aaa7666ee2f2e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 28 Aug 2023 17:11:53 +0000
Subject: [PATCH 010/204] Fix index generation

---
 src/Analyzer/createUniqueTableAliases.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index a836380b5c1..437a8634992 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -48,7 +48,7 @@ public:
                 if (alias.empty())
                 {
                     scope_to_nodes_with_aliases[scope_nodes_stack.back()].push_back(node);
-                    alias = fmt::format("__table{}", table_expression_to_alias.size());
+                    alias = fmt::format("__table{}", ++next_id);
                     node->setAlias(alias);
                 }
                 break;
@@ -89,6 +89,8 @@ public:
     }
 
 private:
+    size_t next_id = 0;
+
     // Stack of nodes which create scopes: QUERY, UNION and LAMBDA.
     QueryTreeNodes scope_nodes_stack;
 

From 2a0d883c8b4a9fad44f678f789c9137d6ecf0761 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 1 Sep 2023 17:33:09 +0000
Subject: [PATCH 011/204] Fix crash

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp |  2 +-
 src/Analyzer/createUniqueTableAliases.cpp | 11 +++++++----
 src/Analyzer/createUniqueTableAliases.h   |  2 +-
 src/Storages/buildQueryTreeForShard.cpp   |  2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 6a9b4da4e16..3e922bb9ee7 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -7180,7 +7180,7 @@ void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context
 {
     QueryAnalyzer analyzer;
     analyzer.resolve(query_tree_node, table_expression, context);
-    createUniqueTableAliases(query_tree_node, context);
+    createUniqueTableAliases(query_tree_node, table_expression, context);
 }
 
 }
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 437a8634992..3e264553c59 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -15,9 +15,12 @@ class CreateUniqueTableAliasesVisitor : public InDepthQueryTreeVisitorWithContex
 public:
     using Base = InDepthQueryTreeVisitorWithContext<CreateUniqueTableAliasesVisitor>;
 
-    explicit CreateUniqueTableAliasesVisitor(const ContextPtr & context)
+    explicit CreateUniqueTableAliasesVisitor(const ContextPtr & context, const QueryTreeNodePtr & table_expression)
         : Base(context)
-    {}
+    {
+        if (table_expression)
+            scope_nodes_stack.push_back(table_expression);
+    }
 
     void enterImpl(QueryTreeNodePtr & node)
     {
@@ -103,9 +106,9 @@ private:
 }
 
 
-void createUniqueTableAliases(QueryTreeNodePtr & node, const ContextPtr & context)
+void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, const ContextPtr & context)
 {
-    CreateUniqueTableAliasesVisitor(context).visit(node);
+    CreateUniqueTableAliasesVisitor(context, table_expression).visit(node);
 }
 
 }
diff --git a/src/Analyzer/createUniqueTableAliases.h b/src/Analyzer/createUniqueTableAliases.h
index 815fafaebca..d57a198498c 100644
--- a/src/Analyzer/createUniqueTableAliases.h
+++ b/src/Analyzer/createUniqueTableAliases.h
@@ -13,6 +13,6 @@ namespace DB
  * For each table expression in the Query Tree generate and add a unique alias.
  * If table expression had an alias in initial query tree, override it.
  */
-void createUniqueTableAliases(QueryTreeNodePtr & node, const ContextPtr & context);
+void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, const ContextPtr & context);
 
 }
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 3bbb1a584ef..c8a4b99474f 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -373,7 +373,7 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
 
     removeGroupingFunctionSpecializations(query_tree_to_modify);
 
-    createUniqueTableAliases(query_tree_to_modify, planner_context->getQueryContext());
+    createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
 
     return query_tree_to_modify;
 }

From 1ba49d17dd2eba6031cda61ed1cf8a72b3f81519 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 21 Nov 2023 00:26:27 +0100
Subject: [PATCH 012/204] Fix crash when remote() argument is a table function

---
 src/Analyzer/createUniqueTableAliases.cpp     |  13 +-
 ...3_analyzer_push_any_to_functions.reference |   8 +-
 ...rder_by_read_in_order_query_plan.reference | 142 +++++++++---------
 3 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 3e264553c59..b49b433bb91 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -1,8 +1,9 @@
+#include <memory>
 #include <unordered_map>
 #include <Analyzer/createUniqueTableAliases.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/IQueryTreeNode.h>
-#include "Common/logger_useful.h"
+#include <Analyzer/LambdaNode.h>
 
 namespace DB
 {
@@ -15,11 +16,11 @@ class CreateUniqueTableAliasesVisitor : public InDepthQueryTreeVisitorWithContex
 public:
     using Base = InDepthQueryTreeVisitorWithContext<CreateUniqueTableAliasesVisitor>;
 
-    explicit CreateUniqueTableAliasesVisitor(const ContextPtr & context, const QueryTreeNodePtr & table_expression)
+    explicit CreateUniqueTableAliasesVisitor(const ContextPtr & context)
         : Base(context)
     {
-        if (table_expression)
-            scope_nodes_stack.push_back(table_expression);
+        // Insert a fake node on top of the stack.
+        scope_nodes_stack.push_back(std::make_shared<LambdaNode>(Names{}, nullptr));
     }
 
     void enterImpl(QueryTreeNodePtr & node)
@@ -106,9 +107,9 @@ private:
 }
 
 
-void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, const ContextPtr & context)
+void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr &  /*table_expression*/, const ContextPtr & context)
 {
-    CreateUniqueTableAliasesVisitor(context, table_expression).visit(node);
+    CreateUniqueTableAliasesVisitor(context).visit(node);
 }
 
 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
index 025c04af1da..fc238759a61 100644
--- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
+++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
@@ -22,7 +22,7 @@ QUERY id: 0
                         COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
                   CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 13, nodes: 2
           CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
@@ -51,7 +51,7 @@ QUERY id: 0
                         COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
                   CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 13, nodes: 2
           CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
@@ -73,7 +73,7 @@ QUERY id: 0
                   COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
             CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 2
           CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
@@ -104,7 +104,7 @@ QUERY id: 0
                   COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
             CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers
       ARGUMENTS
         LIST id: 9, nodes: 2
           CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
diff --git a/tests/queries/0_stateless/02911_analyzer_order_by_read_in_order_query_plan.reference b/tests/queries/0_stateless/02911_analyzer_order_by_read_in_order_query_plan.reference
index 5dd0d0d1820..d8f2decba37 100644
--- a/tests/queries/0_stateless/02911_analyzer_order_by_read_in_order_query_plan.reference
+++ b/tests/queries/0_stateless/02911_analyzer_order_by_read_in_order_query_plan.reference
@@ -13,8 +13,8 @@ select * from tab order by (a + b) * c;
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
 select * from tab order by (a + b) * c desc;
 4	4	4	4
 4	4	4	4
@@ -27,8 +27,8 @@ select * from tab order by (a + b) * c desc;
 0	0	0	0
 0	0	0	0
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC
 -- Exact match, full key
 select * from tab order by (a + b) * c, sin(a / b);
 0	0	0	0
@@ -42,8 +42,8 @@ select * from tab order by (a + b) * c, sin(a / b);
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c, sin(a / b)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
 select * from tab order by (a + b) * c desc, sin(a / b) desc;
 4	4	4	4
 4	4	4	4
@@ -56,8 +56,8 @@ select * from tab order by (a + b) * c desc, sin(a / b) desc;
 0	0	0	0
 0	0	0	0
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc, sin(a / b) desc) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC, sin(divide(a_0, b_1)) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, sin(divide(a_0, b_1)) DESC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, sin(divide(__table1.a, __table1.b)) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, sin(divide(__table1.a, __table1.b)) DESC
 -- Exact match, mixed direction
 select * from tab order by (a + b) * c desc, sin(a / b);
 4	4	4	4
@@ -71,8 +71,8 @@ select * from tab order by (a + b) * c desc, sin(a / b);
 0	0	0	0
 0	0	0	0
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc, sin(a / b)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, sin(divide(__table1.a, __table1.b)) ASC
 select * from tab order by (a + b) * c, sin(a / b) desc;
 0	0	0	0
 0	0	0	0
@@ -85,8 +85,8 @@ select * from tab order by (a + b) * c, sin(a / b) desc;
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c, sin(a / b) desc) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) DESC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) DESC
 -- Wrong order, full sort
 select * from tab order by sin(a / b), (a + b) * c;
 1	1	1	1
@@ -100,32 +100,32 @@ select * from tab order by sin(a / b), (a + b) * c;
 0	0	0	0
 0	0	0	0
 select * from (explain plan actions = 1 select * from tab order by sin(a / b), (a + b) * c) where explain ilike '%sort description%';
-  Sort description: sin(divide(a_0, b_1)) ASC, multiply(plus(a_0, b_1), c_2) ASC
+  Sort description: sin(divide(__table1.a, __table1.b)) ASC, multiply(plus(__table1.a, __table1.b), __table1.c) ASC
 -- Fixed point
 select * from tab where (a + b) * c = 8 order by sin(a / b);
 2	2	2	2
 2	2	2	2
 select * from (explain plan actions = 1 select * from tab where (a + b) * c = 8 order by sin(a / b)) where explain ilike '%sort description%';
-  Prefix sort description: sin(divide(a_0, b_1)) ASC
-  Result sort description: sin(divide(a_0, b_1)) ASC
+  Prefix sort description: sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: sin(divide(__table1.a, __table1.b)) ASC
 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c;
 1	1	1	1
 1	1	1	1
 select * from (explain plan actions = 1 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c) where explain ilike '%sort description%';
-  Prefix sort description: multiply(plus(d_3, 1_UInt8), 4_UInt8) ASC, multiply(plus(a_0, b_1), c_2) ASC
-  Result sort description: multiply(plus(d_3, 1_UInt8), 4_UInt8) ASC, multiply(plus(a_0, b_1), c_2) ASC
+  Prefix sort description: multiply(plus(__table1.d, 1_UInt8), 4_UInt8) ASC, multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+  Result sort description: multiply(plus(__table1.d, 1_UInt8), 4_UInt8) ASC, multiply(plus(__table1.a, __table1.b), __table1.c) ASC
 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b);
 2	2	2	2
 2	2	2	2
 select * from (explain plan actions = 1 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b)) where explain ilike '%sort description%';
-  Prefix sort description: multiply(plus(d_3, 1_UInt8), 4_UInt8) ASC, sin(divide(a_0, b_1)) ASC
-  Result sort description: multiply(plus(d_3, 1_UInt8), 4_UInt8) ASC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: multiply(plus(__table1.d, 1_UInt8), 4_UInt8) ASC, sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: multiply(plus(__table1.d, 1_UInt8), 4_UInt8) ASC, sin(divide(__table1.a, __table1.b)) ASC
 -- Wrong order with fixed point
 select * from tab where (a + b) * c = 8 order by sin(b / a);
 2	2	2	2
 2	2	2	2
 select * from (explain plan actions = 1 select * from tab where (a + b) * c = 8 order by sin(b / a)) where explain ilike '%sort description%';
-  Sort description: sin(divide(b_1, a_0)) ASC
+  Sort description: sin(divide(__table1.b, __table1.a)) ASC
 -- Monotonicity
 select * from tab order by intDiv((a + b) * c, 2);
 0	0	0	0
@@ -139,8 +139,8 @@ select * from tab order by intDiv((a + b) * c, 2);
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from tab order by intDiv((a + b) * c, 2)) where explain like '%sort description%';
-  Prefix sort description: intDiv(multiply(plus(a_0, b_1), c_2), 2_UInt8) ASC
-  Result sort description: intDiv(multiply(plus(a_0, b_1), c_2), 2_UInt8) ASC
+  Prefix sort description: intDiv(multiply(plus(__table1.a, __table1.b), __table1.c), 2_UInt8) ASC
+  Result sort description: intDiv(multiply(plus(__table1.a, __table1.b), __table1.c), 2_UInt8) ASC
 select * from tab order by intDiv((a + b) * c, 2), sin(a / b);
 0	0	0	0
 0	0	0	0
@@ -153,36 +153,36 @@ select * from tab order by intDiv((a + b) * c, 2), sin(a / b);
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from tab order by intDiv((a + b) * c, 2), sin(a / b)) where explain like '%sort description%';
-  Prefix sort description: intDiv(multiply(plus(a_0, b_1), c_2), 2_UInt8) ASC
-  Result sort description: intDiv(multiply(plus(a_0, b_1), c_2), 2_UInt8) ASC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: intDiv(multiply(plus(__table1.a, __table1.b), __table1.c), 2_UInt8) ASC
+  Result sort description: intDiv(multiply(plus(__table1.a, __table1.b), __table1.c), 2_UInt8) ASC, sin(divide(__table1.a, __table1.b)) ASC
 -- select * from tab order by (a + b) * c, intDiv(sin(a / b), 2);
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c, intDiv(sin(a / b), 2)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) ASC
 -- select * from tab order by (a + b) * c desc , intDiv(sin(a / b), 2);
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc , intDiv(sin(a / b), 2)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) ASC
 -- select * from tab order by (a + b) * c, intDiv(sin(a / b), 2) desc;
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c, intDiv(sin(a / b), 2) desc) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) DESC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) DESC
 -- select * from tab order by (a + b) * c desc, intDiv(sin(a / b), 2) desc;
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc, intDiv(sin(a / b), 2) desc) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(sin(divide(a_0, b_1)), 2_UInt8) DESC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(sin(divide(__table1.a, __table1.b)), 2_UInt8) DESC
 -- select * from tab order by (a + b) * c desc, intDiv(sin(a / b), -2);
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc, intDiv(sin(a / b), -2)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(sin(divide(a_0, b_1)), -2_Int8) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(sin(divide(a_0, b_1)), -2_Int8) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(sin(divide(__table1.a, __table1.b)), -2_Int8) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(sin(divide(__table1.a, __table1.b)), -2_Int8) ASC
 -- select * from tab order by (a + b) * c desc, intDiv(intDiv(sin(a / b), -2), -3);
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c desc, intDiv(intDiv(sin(a / b), -2), -3)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) DESC
-  Result sort description: multiply(plus(a_0, b_1), c_2) DESC, intDiv(intDiv(sin(divide(a_0, b_1)), -2_Int8), -3_Int8) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) DESC, intDiv(intDiv(sin(divide(__table1.a, __table1.b)), -2_Int8), -3_Int8) ASC
 -- select * from tab order by (a + b) * c, intDiv(intDiv(sin(a / b), -2), -3);
 select * from (explain plan actions = 1 select * from tab order by (a + b) * c, intDiv(intDiv(sin(a / b), -2), -3)) where explain like '%sort description%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, intDiv(intDiv(sin(divide(a_0, b_1)), -2_Int8), -3_Int8) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, intDiv(intDiv(sin(divide(a_0, b_1)), -2_Int8), -3_Int8) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, intDiv(intDiv(sin(divide(__table1.a, __table1.b)), -2_Int8), -3_Int8) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, intDiv(intDiv(sin(divide(__table1.a, __table1.b)), -2_Int8), -3_Int8) ASC
 -- Aliases
 select * from (select *, a + b as x from tab) order by x * c;
 0	0	0	0	0
@@ -196,8 +196,8 @@ select * from (select *, a + b as x from tab) order by x * c;
 4	4	4	4	8
 4	4	4	4	8
 select * from (explain plan actions = 1 select * from (select *, a + b as x from tab) order by x * c) where explain like '%sort description%';
-  Prefix sort description: multiply(x_4, c_2) ASC
-  Result sort description: multiply(x_4, c_2) ASC
+  Prefix sort description: multiply(__table1.x, __table1.c) ASC
+  Result sort description: multiply(__table1.x, __table1.c) ASC
 select * from (select *, a + b as x, a / b as y from tab) order by x * c, sin(y);
 0	0	0	0	0	nan
 0	0	0	0	0	nan
@@ -210,8 +210,8 @@ select * from (select *, a + b as x, a / b as y from tab) order by x * c, sin(y)
 4	4	4	4	8	1
 4	4	4	4	8	1
 select * from (explain plan actions = 1 select * from (select *, a + b as x, a / b as y from tab) order by x * c, sin(y)) where explain like '%sort description%';
-  Prefix sort description: multiply(x_4, c_2) ASC, sin(y_5) ASC
-  Result sort description: multiply(x_4, c_2) ASC, sin(y_5) ASC
+  Prefix sort description: multiply(__table1.x, __table1.c) ASC, sin(__table1.y) ASC
+  Result sort description: multiply(__table1.x, __table1.c) ASC, sin(__table1.y) ASC
 select * from (select *, a / b as y from (select *, a + b as x from tab)) order by x * c, sin(y);
 0	0	0	0	0	nan
 0	0	0	0	0	nan
@@ -224,8 +224,8 @@ select * from (select *, a / b as y from (select *, a + b as x from tab)) order
 4	4	4	4	8	1
 4	4	4	4	8	1
 select * from (explain plan actions = 1 select * from (select *, a / b as y from (select *, a + b as x from tab)) order by x * c, sin(y)) where explain like '%sort description%';
-  Prefix sort description: multiply(x_4, c_2) ASC, sin(y_5) ASC
-  Result sort description: multiply(x_4, c_2) ASC, sin(y_5) ASC
+  Prefix sort description: multiply(__table1.x, __table1.c) ASC, sin(__table1.y) ASC
+  Result sort description: multiply(__table1.x, __table1.c) ASC, sin(__table1.y) ASC
 -- { echoOn }
 
 select * from tab2 order by toTimeZone(toTimezone(x, 'UTC'), 'CET'), intDiv(intDiv(y, -2), -3);
@@ -238,8 +238,8 @@ select * from tab2 order by toTimeZone(toTimezone(x, 'UTC'), 'CET'), intDiv(intD
 2020-02-05 00:00:00	3	3
 2020-02-05 00:00:00	3	3
 select * from (explain plan actions = 1 select * from tab2 order by toTimeZone(toTimezone(x, 'UTC'), 'CET'), intDiv(intDiv(y, -2), -3)) where explain like '%sort description%';
-  Prefix sort description: toTimezone(toTimezone(x_0, \'UTC\'_String), \'CET\'_String) ASC, intDiv(intDiv(y_1, -2_Int8), -3_Int8) ASC
-  Result sort description: toTimezone(toTimezone(x_0, \'UTC\'_String), \'CET\'_String) ASC, intDiv(intDiv(y_1, -2_Int8), -3_Int8) ASC
+  Prefix sort description: toTimezone(toTimezone(__table1.x, \'UTC\'_String), \'CET\'_String) ASC, intDiv(intDiv(__table1.y, -2_Int8), -3_Int8) ASC
+  Result sort description: toTimezone(toTimezone(__table1.x, \'UTC\'_String), \'CET\'_String) ASC, intDiv(intDiv(__table1.y, -2_Int8), -3_Int8) ASC
 select * from tab2 order by toStartOfDay(x), intDiv(intDiv(y, -2), -3);
 2020-02-02 00:00:00	0	0
 2020-02-02 00:00:00	0	0
@@ -250,12 +250,12 @@ select * from tab2 order by toStartOfDay(x), intDiv(intDiv(y, -2), -3);
 2020-02-05 00:00:00	3	3
 2020-02-05 00:00:00	3	3
 select * from (explain plan actions = 1 select * from tab2 order by toStartOfDay(x), intDiv(intDiv(y, -2), -3)) where explain like '%sort description%';
-  Prefix sort description: toStartOfDay(x_0) ASC
-  Result sort description: toStartOfDay(x_0) ASC, intDiv(intDiv(y_1, -2_Int8), -3_Int8) ASC
+  Prefix sort description: toStartOfDay(__table1.x) ASC
+  Result sort description: toStartOfDay(__table1.x) ASC, intDiv(intDiv(__table1.y, -2_Int8), -3_Int8) ASC
 -- select * from tab2 where toTimezone(x, 'CET') = '2020-02-03 01:00:00' order by intDiv(intDiv(y, -2), -3);
 select * from (explain plan actions = 1 select * from tab2 where toTimezone(x, 'CET') = '2020-02-03 01:00:00' order by intDiv(intDiv(y, -2), -3)) where explain like '%sort description%';
-  Prefix sort description: intDiv(intDiv(y_1, -2_Int8), -3_Int8) ASC
-  Result sort description: intDiv(intDiv(y_1, -2_Int8), -3_Int8) ASC
+  Prefix sort description: intDiv(intDiv(__table1.y, -2_Int8), -3_Int8) ASC
+  Result sort description: intDiv(intDiv(__table1.y, -2_Int8), -3_Int8) ASC
 -- { echoOn }
 
 -- Union (not fully supported)
@@ -281,8 +281,8 @@ select * from (select * from tab union all select * from tab3) order by (a + b)
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab3) order by (a + b) * c, sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
         ReadType: InOrder
         ReadType: InOrder
 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b);
@@ -291,8 +291,8 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
 3	3	3	3
 3	3	3	3
 select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
-  Prefix sort description: sin(divide(a_0, b_1)) ASC
-  Result sort description: sin(divide(a_0, b_1)) ASC
+  Prefix sort description: sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: sin(divide(__table1.a, __table1.b)) ASC
           ReadType: InOrder
           ReadType: InOrder
 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b);
@@ -309,8 +309,8 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
 0	0	0	0
 0	0	0	0
 select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
-  Prefix sort description: sin(divide(a_0, b_1)) ASC
-  Result sort description: sin(divide(a_0, b_1)) ASC
+  Prefix sort description: sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: sin(divide(__table1.a, __table1.b)) ASC
           ReadType: InOrder
         ReadType: InOrder
 select * from (select * from tab union all select * from tab5) order by (a + b) * c;
@@ -335,8 +335,8 @@ select * from (select * from tab union all select * from tab5) order by (a + b)
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5) order by (a + b) * c) where explain like '%sort description%' or explain like '%ReadType%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
         ReadType: InOrder
         ReadType: InOrder
 select * from (select * from tab union all select * from tab5) order by (a + b) * c, sin(a / b);
@@ -361,11 +361,11 @@ select * from (select * from tab union all select * from tab5) order by (a + b)
 4	4	4	4
 4	4	4	4
 select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5) order by (a + b) * c, sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
-  Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
-  Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+  Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
+  Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
         ReadType: InOrder
-      Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-      Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+      Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+      Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
           ReadType: InOrder
 -- Union with limit
 select * from (select * from tab union all select * from tab5) order by (a + b) * c, sin(a / b) limit 3;
@@ -375,12 +375,12 @@ select * from (select * from tab union all select * from tab5) order by (a + b)
 select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5) order by (a + b) * c, sin(a / b) limit 3) where explain ilike '%sort description%' or explain like '%ReadType%' or explain like '%Limit%';
   Limit (preliminary LIMIT (without OFFSET))
   Limit 3
-    Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
-    Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+    Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
+    Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
     Limit 3
           ReadType: InOrder
-        Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-        Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+        Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+        Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
             ReadType: InOrder
 -- In this example, we read-in-order from tab up to ((a + b) * c, sin(a / b)) and from tab5 up to ((a + b) * c).
 -- In case of tab5, there would be two finish sorting transforms: ((a + b) * c) -> ((a + b) * c, sin(a / b)) -> ((a + b) * c, sin(a / b), d).
@@ -393,14 +393,14 @@ select * from (select * from tab union all select * from tab5 union all select *
 select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3) where explain ilike '%sort description%' or explain like '%ReadType%' or explain like '%Limit%';
   Limit (preliminary LIMIT (without OFFSET))
   Limit 3
-    Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
-    Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC, d_3 ASC
+    Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
+    Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC, __table1.d ASC
     Limit 3
           ReadType: InOrder
-        Prefix sort description: multiply(plus(a_0, b_1), c_2) ASC
-        Result sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC
+        Prefix sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC
+        Result sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC
             ReadType: InOrder
-        Sort description: multiply(plus(a_0, b_1), c_2) ASC, sin(divide(a_0, b_1)) ASC, d_3 ASC
+        Sort description: multiply(plus(__table1.a, __table1.b), __table1.c) ASC, sin(divide(__table1.a, __table1.b)) ASC, __table1.d ASC
         Limit 3
             ReadType: Default
 drop table if exists tab;

From c1a58b0aca1d0a41d6912dd7ecac8d585c17cc09 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 21 Nov 2023 00:47:55 +0100
Subject: [PATCH 013/204] Update reference file

---
 ...8_distinct_to_count_optimization.reference | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
index a2c441fa460..c2075f72f33 100644
--- a/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
+++ b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
@@ -15,14 +15,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, is_subquery: 1, is_distinct: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1, is_distinct: 1
       PROJECTION COLUMNS
         a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
   SETTINGS allow_experimental_analyzer=1
 2. test distinct with subquery alias
 3
@@ -41,14 +41,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1, is_distinct: 1
       PROJECTION COLUMNS
         a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
   SETTINGS allow_experimental_analyzer=1
 3. test distinct with compound column name
 3
@@ -67,14 +67,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1, is_distinct: 1
       PROJECTION COLUMNS
         a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
   SETTINGS allow_experimental_analyzer=1
 4. test distinct with select expression alias
 3
@@ -93,14 +93,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1, is_distinct: 1
       PROJECTION COLUMNS
         alias_of_a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
   SETTINGS allow_experimental_analyzer=1
 5. test simple group by
 3
@@ -122,14 +122,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
         LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
@@ -154,14 +154,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
         LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
@@ -186,14 +186,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         alias_of_a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
         LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
@@ -218,14 +218,14 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
   JOIN TREE
-    QUERY id: 3, alias: t, is_subquery: 1
+    QUERY id: 3, alias: __table1, is_subquery: 1
       PROJECTION COLUMNS
         alias_of_a UInt8
       PROJECTION
         LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
       JOIN TREE
-        TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
+        TABLE id: 6, alias: __table2, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
         LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6

From 174607c6bf20c800298595de82d24ccb0b894197 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Mon, 27 Nov 2023 15:10:07 +0100
Subject: [PATCH 014/204] Fixed potential exception due to stale profile UUID

`SettingsProfilesInfo::profiles` is not updated in bg if any of the profiles assigned to the user change, but `SettingsProfilesInfo::profiles_with_implicit` is.

Update of #42641
kudos @tavplubix  https://github.com/ClickHouse/ClickHouse/pull/42641/files/3d0c07ac5b8f18917f2314474030910176ec7940#r1406196201
---
 src/Access/SettingsProfilesInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp
index d8b52ecf5e4..ae72cd52f2c 100644
--- a/src/Access/SettingsProfilesInfo.cpp
+++ b/src/Access/SettingsProfilesInfo.cpp
@@ -66,7 +66,7 @@ Strings SettingsProfilesInfo::getProfileNames() const
 {
     Strings result;
     result.reserve(profiles.size());
-    for (const auto & profile_id : profiles)
+    for (const auto & profile_id : profiles_with_implicit)
     {
         const auto p = names_of_profiles.find(profile_id);
         if (p != names_of_profiles.end())

From 7411fcc907c65513dcb3895aa339007d3ac36aed Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 12:12:23 +0100
Subject: [PATCH 015/204] Filtering of profile UUIDs for SettingsProfilesInfo

---
 src/Access/SettingsProfilesCache.cpp | 12 ++++++++++--
 src/Access/SettingsProfilesCache.h   |  6 +++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index f03e68ba455..11cb3a79926 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -4,6 +4,8 @@
 #include <Access/SettingsProfilesInfo.h>
 #include <Common/quoteString.h>
 
+#include <boost/range/algorithm_ext/erase.hpp>
+
 
 namespace DB
 {
@@ -141,7 +143,7 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
     info->profiles = merged_settings.toProfileIDs();
-    substituteProfiles(merged_settings, info->profiles_with_implicit, info->names_of_profiles);
+    substituteProfiles(merged_settings, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
 
     info->settings = merged_settings.toSettingsChanges();
     info->constraints = merged_settings.toSettingsConstraints(access_control);
@@ -152,6 +154,7 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
 
 void SettingsProfilesCache::substituteProfiles(
     SettingsProfileElements & elements,
+    std::vector<UUID> & profiles,
     std::vector<UUID> & substituted_profiles,
     std::unordered_map<UUID, String> & names_of_substituted_profiles) const
 {
@@ -184,6 +187,11 @@ void SettingsProfilesCache::substituteProfiles(
         names_of_substituted_profiles.emplace(profile_id, profile->getName());
     }
     std::reverse(substituted_profiles.begin(), substituted_profiles.end());
+
+    boost::range::remove_erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
+    {
+        return !substituted_profiles_set.contains(profile_id);
+    });
 }
 
 std::shared_ptr<const EnabledSettings> SettingsProfilesCache::getEnabledSettings(
@@ -231,7 +239,7 @@ std::shared_ptr<const SettingsProfilesInfo> SettingsProfilesCache::getSettingsPr
 
     info->profiles.push_back(profile_id);
     info->profiles_with_implicit.push_back(profile_id);
-    substituteProfiles(elements, info->profiles_with_implicit, info->names_of_profiles);
+    substituteProfiles(elements, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
     info->settings = elements.toSettingsChanges();
     info->constraints.merge(elements.toSettingsConstraints(access_control));
 
diff --git a/src/Access/SettingsProfilesCache.h b/src/Access/SettingsProfilesCache.h
index 28914596ccc..afc3c3e13a5 100644
--- a/src/Access/SettingsProfilesCache.h
+++ b/src/Access/SettingsProfilesCache.h
@@ -37,7 +37,11 @@ private:
     void profileRemoved(const UUID & profile_id);
     void mergeSettingsAndConstraints();
     void mergeSettingsAndConstraintsFor(EnabledSettings & enabled) const;
-    void substituteProfiles(SettingsProfileElements & elements, std::vector<UUID> & substituted_profiles, std::unordered_map<UUID, String> & names_of_substituted_profiles) const;
+
+    void substituteProfiles(SettingsProfileElements & elements,
+        std::vector<UUID> & profiles,
+        std::vector<UUID> & substituted_profiles,
+        std::unordered_map<UUID, String> & names_of_substituted_profiles) const;
 
     const AccessControl & access_control;
     std::unordered_map<UUID, SettingsProfilePtr> all_profiles;

From 6aaf1565e1c3d52d2867e375f52950e2cdbaa504 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 12:14:01 +0100
Subject: [PATCH 016/204] Using profiles instead of profiles_with_implicit for
 getProfileNames()

---
 src/Access/SettingsProfilesInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp
index ae72cd52f2c..d8b52ecf5e4 100644
--- a/src/Access/SettingsProfilesInfo.cpp
+++ b/src/Access/SettingsProfilesInfo.cpp
@@ -66,7 +66,7 @@ Strings SettingsProfilesInfo::getProfileNames() const
 {
     Strings result;
     result.reserve(profiles.size());
-    for (const auto & profile_id : profiles_with_implicit)
+    for (const auto & profile_id : profiles)
     {
         const auto p = names_of_profiles.find(profile_id);
         if (p != names_of_profiles.end())

From b634e043d5e64073961bd9cd337f4dbef6615657 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 14:32:45 +0100
Subject: [PATCH 017/204] std::erase_if instead of
 boost::range::remove_erase_if

---
 src/Access/SettingsProfilesCache.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index 11cb3a79926..9f4fc5a5d89 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -4,8 +4,6 @@
 #include <Access/SettingsProfilesInfo.h>
 #include <Common/quoteString.h>
 
-#include <boost/range/algorithm_ext/erase.hpp>
-
 
 namespace DB
 {
@@ -188,7 +186,7 @@ void SettingsProfilesCache::substituteProfiles(
     }
     std::reverse(substituted_profiles.begin(), substituted_profiles.end());
 
-    boost::range::remove_erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
+    std::erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
     {
         return !substituted_profiles_set.contains(profile_id);
     });

From 995b51ef736c20d9490f75cebaa4f42291df40cf Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 7 Dec 2023 03:41:32 +0000
Subject: [PATCH 018/204] Allow avoiding resolving hostnames in DDLWorker

---
 programs/server/config.xml                    |  3 +
 src/Interpreters/DDLTask.cpp                  | 18 ++++-
 src/Interpreters/DDLTask.h                    |  2 +-
 src/Interpreters/DDLWorker.cpp                |  5 +-
 src/Interpreters/DDLWorker.h                  |  2 +
 .../test_ddl_config_hostname/__init__.py      |  0
 .../configs/remote_servers.xml                | 19 +++++
 .../test_ddl_config_hostname/test.py          | 80 +++++++++++++++++++
 8 files changed, 125 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/test_ddl_config_hostname/__init__.py
 create mode 100644 tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
 create mode 100644 tests/integration/test_ddl_config_hostname/test.py

diff --git a/programs/server/config.xml b/programs/server/config.xml
index e333082d099..688f0bf5645 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1380,6 +1380,9 @@
 
         <!-- Controls how many tasks could be in the queue -->
         <!-- <max_tasks_in_queue>1000</max_tasks_in_queue> -->
+
+        <!-- Host name of the current node. If specified, will only compare and not resolve hostnames inside the DDL tasks -->
+        <!-- <host_name>replica</host_name> -->
     </distributed_ddl>
 
     <!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 6e9155ab2a2..172d68f2941 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -215,14 +215,28 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z
 }
 
 
-bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper)
+bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional<std::string> & config_host_name)
 {
     bool host_in_hostlist = false;
     std::exception_ptr first_exception = nullptr;
 
+    auto maybe_secure_port = global_context->getTCPPortSecure();
+
     for (const HostID & host : entry.hosts)
     {
-        auto maybe_secure_port = global_context->getTCPPortSecure();
+        if (config_host_name)
+        {
+            if (host.host_name != *config_host_name)
+                continue;
+
+            if (!(maybe_secure_port && maybe_secure_port == host.port) && !(global_context->getTCPPort() == host.port))
+                continue;
+
+            host_in_hostlist = true;
+            host_id = host;
+            host_id_str = host.toString();
+            break;
+        }
 
         try
         {
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 1ceb74c7048..e1a81ac97af 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -143,7 +143,7 @@ struct DDLTask : public DDLTaskBase
 {
     DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
 
-    bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper);
+    bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional<std::string> & config_host_name);
 
     void setClusterInfo(ContextPtr context, Poco::Logger * log);
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 30cf6fd0568..de24dea1857 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -107,6 +107,9 @@ DDLWorker::DDLWorker(
         cleanup_delay_period = config->getUInt64(prefix + ".cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
         max_tasks_in_queue = std::max<UInt64>(1, config->getUInt64(prefix + ".max_tasks_in_queue", max_tasks_in_queue));
 
+        if (config->has(prefix + ".host_name"))
+            config_host_name = config->getString(prefix + ".host_name");
+
         if (config->has(prefix + ".profile"))
             context->setSetting("profile", config->getString(prefix + ".profile"));
     }
@@ -214,7 +217,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     /// Stage 2: resolve host_id and check if we should execute query or not
     /// Multiple clusters can use single DDL queue path in ZooKeeper,
     /// So we should skip task if we cannot find current host in cluster hosts list.
-    if (!task->findCurrentHostID(context, log, zookeeper))
+    if (!task->findCurrentHostID(context, log, zookeeper, config_host_name))
     {
         out_reason = "There is no a local address in host list";
         return add_to_skip_set();
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index d34a4135199..adc9a491d81 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -153,6 +153,8 @@ protected:
     ContextMutablePtr context;
     Poco::Logger * log;
 
+    std::optional<std::string> config_host_name; /// host_name from config
+
     std::string host_fqdn;      /// current host domain name
     std::string host_fqdn_id;   /// host_name:port
     std::string queue_dir;      /// dir with queue of queries
diff --git a/tests/integration/test_ddl_config_hostname/__init__.py b/tests/integration/test_ddl_config_hostname/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml b/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
new file mode 100644
index 00000000000..8c6a507951d
--- /dev/null
+++ b/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
@@ -0,0 +1,19 @@
+<clickhouse>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+
+    <allow_zookeeper_write>1</allow_zookeeper_write>
+
+    <distributed_ddl>
+        <host_name>node1</host_name>
+    </distributed_ddl>
+</clickhouse>
diff --git a/tests/integration/test_ddl_config_hostname/test.py b/tests/integration/test_ddl_config_hostname/test.py
new file mode 100644
index 00000000000..f6cb5f5c38e
--- /dev/null
+++ b/tests/integration/test_ddl_config_hostname/test.py
@@ -0,0 +1,80 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/remote_servers.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_ddl_queue_delete_add_replica(started_cluster):
+    #  Some query started on the cluster, then we deleted some unfinished node
+    #  and added a new node to the cluster. Considering that there are less
+    #  finished nodes than expected and we can't resolve deleted node's hostname
+    #  the queue will be stuck on a new node.
+    #  <host_name> inside <distributed_ddl> allows us to simply discard deleted
+    #  node's hostname by simple comparison without trying to resolve it.
+
+    node1.query(
+        "create table hostname_change on cluster test_cluster (n int) engine=Log"
+    )
+
+    # There's no easy way to change hostname of a container, so let's update values in zk
+    query_znode = node1.query(
+        "select max(name) from system.zookeeper where path='/clickhouse/task_queue/ddl'"
+    )[:-1]
+
+    value = (
+        node1.query(
+            "select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{}' format TSVRaw".format(
+                query_znode
+            )
+        )[:-1]
+        .replace("hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']")
+        .replace("initiator: node1:9000", "initiator: finished_node:9000")
+        .replace("\\'", "#")
+        .replace("'", "\\'")
+        .replace("\n", "\\n")
+        .replace("#", "\\'")
+    )
+
+    finished_znode = node1.query(
+        "select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{}/finished' and name like '%node1%'".format(
+            query_znode
+        )
+    )[:-1]
+
+    node1.query(
+        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl', '{}')".format(
+            query_znode, value
+        )
+    )
+    started_cluster.get_kazoo_client("zoo1").delete(
+        "/clickhouse/task_queue/ddl/{}/finished/{}".format(query_znode, finished_znode)
+    )
+
+    node1.query(
+        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl/{}/finished', '0\\n')".format(
+            finished_znode.replace("node1", "finished_node"), query_znode
+        )
+    )
+
+    node1.restart_clickhouse(kill=True)
+
+    node1.query(
+        "create table hostname_change2 on cluster test_cluster (n int) engine=Log"
+    )

From 10bfd054d82bd70d0a931ad71c6031a54306740c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Fri, 8 Dec 2023 19:40:09 +0000
Subject: [PATCH 019/204] Fix style & review

---
 src/Interpreters/DDLTask.cpp                  |  4 ++--
 .../test_ddl_config_hostname/test.py          | 24 ++++++++-----------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 172d68f2941..0164f5668a2 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -226,10 +226,10 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     {
         if (config_host_name)
         {
-            if (host.host_name != *config_host_name)
+            if (config_host_name != host.host_name)
                 continue;
 
-            if (!(maybe_secure_port && maybe_secure_port == host.port) && !(global_context->getTCPPort() == host.port))
+            if (maybe_secure_port != host.port && global_context->getTCPPort() != host.port)
                 continue;
 
             host_in_hostlist = true;
diff --git a/tests/integration/test_ddl_config_hostname/test.py b/tests/integration/test_ddl_config_hostname/test.py
index f6cb5f5c38e..724e766c9dc 100644
--- a/tests/integration/test_ddl_config_hostname/test.py
+++ b/tests/integration/test_ddl_config_hostname/test.py
@@ -40,11 +40,11 @@ def test_ddl_queue_delete_add_replica(started_cluster):
 
     value = (
         node1.query(
-            "select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{}' format TSVRaw".format(
-                query_znode
-            )
+            f"select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{query_znode}' format TSVRaw"
         )[:-1]
-        .replace("hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']")
+        .replace(
+            "hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']"
+        )
         .replace("initiator: node1:9000", "initiator: finished_node:9000")
         .replace("\\'", "#")
         .replace("'", "\\'")
@@ -53,24 +53,20 @@ def test_ddl_queue_delete_add_replica(started_cluster):
     )
 
     finished_znode = node1.query(
-        "select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{}/finished' and name like '%node1%'".format(
-            query_znode
-        )
+        f"select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{query_znode}/finished' and name like '%node1%'"
     )[:-1]
 
     node1.query(
-        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl', '{}')".format(
-            query_znode, value
-        )
+        f"insert into system.zookeeper (name, path, value) values ('{query_znode}', '/clickhouse/task_queue/ddl', '{value}')"
     )
     started_cluster.get_kazoo_client("zoo1").delete(
-        "/clickhouse/task_queue/ddl/{}/finished/{}".format(query_znode, finished_znode)
+        f"/clickhouse/task_queue/ddl/{query_znode}/finished/{finished_znode}"
     )
 
+    finished_znode = finished_znode.replace("node1", "finished_node")
+
     node1.query(
-        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl/{}/finished', '0\\n')".format(
-            finished_znode.replace("node1", "finished_node"), query_znode
-        )
+        f"insert into system.zookeeper (name, path, value) values ('{finished_znode}', '/clickhouse/task_queue/ddl/{query_znode}/finished', '0\\n')"
     )
 
     node1.restart_clickhouse(kill=True)

From eb3862100235ffde69b2aae454b9b398f37d0a18 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 18:28:30 +0100
Subject: [PATCH 020/204] Print pretty type names by default

---
 src/Core/Settings.h               | 2 +-
 src/Core/SettingsChangesHistory.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9601cd3e398..08add1af7c5 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -832,7 +832,7 @@ class IColumn;
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
-    M(Bool, print_pretty_type_names, false, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
+    M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
     M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
     M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\
 
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 54c28fb9f92..8102ca818b2 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -81,7 +81,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}}},
+    {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
+              {"print_pretty_type_names", false, true, "Better user experience."}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},

From 12f92c9bebba30193b4ba0faa11f7b6ddc9825b3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 12 Dec 2023 18:11:53 +0000
Subject: [PATCH 021/204] Try to analyze join expression based on column
 source.

---
 src/Planner/PlannerJoinTree.cpp |  12 ++
 src/Planner/PlannerJoins.cpp    | 291 +++++++++++++++++++++-----------
 src/Planner/PlannerJoins.h      |   2 +-
 3 files changed, 207 insertions(+), 98 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 7c1e6ded1e0..a075a19794e 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -979,6 +979,18 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
     auto right_plan = std::move(right_join_tree_query_plan.query_plan);
     auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
+    // {
+    //     WriteBufferFromOwnString buf;
+    //     left_plan.explainPlan(buf, {.header = true, .actions = true});
+    //     std::cerr << "left plan \n "<< buf.str() << std::endl;
+    // }
+
+    // {
+    //     WriteBufferFromOwnString buf;
+    //     right_plan.explainPlan(buf, {.header = true, .actions = true});
+    //     std::cerr << "right plan \n "<< buf.str() << std::endl;
+    // }
+
     JoinClausesAndActions join_clauses_and_actions;
     JoinKind join_kind = join_node.getKind();
     JoinStrictness join_strictness = join_node.getStrictness();
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 5f53c8e1fce..32102b2299a 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -20,6 +20,7 @@
 
 #include <Analyzer/Utils.h>
 #include <Analyzer/FunctionNode.h>
+#include <Analyzer/ColumnNode.h>
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/TableNode.h>
 #include <Analyzer/TableFunctionNode.h>
@@ -112,41 +113,85 @@ String JoinClause::dump() const
 namespace
 {
 
-std::optional<JoinTableSide> extractJoinTableSideFromExpression(const ActionsDAG::Node * expression_root_node,
-    const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
-    const NameSet & left_table_expression_columns_names,
-    const NameSet & right_table_expression_columns_names,
+std::optional<JoinTableSide> extractJoinTableSideFromExpression(//const ActionsDAG::Node * expression_root_node,
+    const IQueryTreeNode * expression_root_node,
+    //const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
+    // const NameSet & left_table_expression_columns_names,
+    // const NameSet & right_table_expression_columns_names,
     const JoinNode & join_node)
 {
     std::optional<JoinTableSide> table_side;
-    std::vector<const ActionsDAG::Node *> nodes_to_process;
+    std::vector<const IQueryTreeNode *> nodes_to_process;
     nodes_to_process.push_back(expression_root_node);
 
+    // std::cerr << "==== extractJoinTableSideFromExpression\n";
+    // std::cerr << "inp nodes" << std::endl;
+    // for (const auto * node : join_expression_dag_input_nodes)
+    //     std::cerr << reinterpret_cast<const void *>(node) << ' ' << node->result_name << std::endl;
+
+
+    // std::cerr << "l names" << std::endl;
+    // for (const auto & l : left_table_expression_columns_names)
+    //     std::cerr << l << std::endl;
+
+    // std::cerr << "r names" << std::endl;
+    // for (const auto & r : right_table_expression_columns_names)
+    //     std::cerr << r << std::endl;
+
+    const auto * left_table_expr = join_node.getLeftTableExpression().get();
+    const auto * right_table_expr = join_node.getRightTableExpression().get();
+
     while (!nodes_to_process.empty())
     {
         const auto * node_to_process = nodes_to_process.back();
         nodes_to_process.pop_back();
 
-        for (const auto & child : node_to_process->children)
-            nodes_to_process.push_back(child);
+        //std::cerr << "... " << reinterpret_cast<const void *>(node_to_process) << ' ' << node_to_process->result_name << std::endl;
 
-        if (!join_expression_dag_input_nodes.contains(node_to_process))
+        if (const auto * function_node = node_to_process->as<FunctionNode>())
+        {
+            for (const auto & child : function_node->getArguments())
+                nodes_to_process.push_back(child.get());
+
+            continue;
+        }
+
+        const auto * column_node = node_to_process->as<ColumnNode>();
+        if (!column_node)
             continue;
 
-        const auto & input_name = node_to_process->result_name;
+        // if (!join_expression_dag_input_nodes.contains(node_to_process))
+        //     continue;
 
-        bool left_table_expression_contains_input = left_table_expression_columns_names.contains(input_name);
-        bool right_table_expression_contains_input = right_table_expression_columns_names.contains(input_name);
+        const auto & input_name = column_node->getColumnName();
 
-        if (!left_table_expression_contains_input && !right_table_expression_contains_input)
+        // bool left_table_expression_contains_input = left_table_expression_columns_names.contains(input_name);
+        // bool right_table_expression_contains_input = right_table_expression_columns_names.contains(input_name);
+
+        // if (!left_table_expression_contains_input && !right_table_expression_contains_input)
+        //     throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
+        //         "JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns",
+        //         join_node.formatASTForErrorMessage(),
+        //         input_name,
+        //         boost::join(left_table_expression_columns_names, ", "),
+        //         boost::join(right_table_expression_columns_names, ", "));
+
+        const auto * column_source = column_node->getColumnSource().get();
+        if (!column_source)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "No source for column {} in JOIN {}", input_name, join_node.formatASTForErrorMessage());
+
+        bool is_column_from_left_expr = column_source == left_table_expr;
+        bool is_column_from_right_expr = column_source == right_table_expr;
+
+        if (!is_column_from_left_expr && !is_column_from_right_expr)
             throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
                 "JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns",
                 join_node.formatASTForErrorMessage(),
                 input_name,
-                boost::join(left_table_expression_columns_names, ", "),
-                boost::join(right_table_expression_columns_names, ", "));
+                left_table_expr->formatASTForErrorMessage(),
+                right_table_expr->formatASTForErrorMessage());
 
-        auto input_table_side = left_table_expression_contains_input ? JoinTableSide::Left : JoinTableSide::Right;
+        auto input_table_side = is_column_from_left_expr ? JoinTableSide::Left : JoinTableSide::Right;
         if (table_side && (*table_side) != input_table_side)
             throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
                 "JOIN {} join expression contains column from left and right table",
@@ -158,29 +203,58 @@ std::optional<JoinTableSide> extractJoinTableSideFromExpression(const ActionsDAG
     return table_side;
 }
 
-void buildJoinClause(ActionsDAGPtr join_expression_dag,
-    const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
-    const ActionsDAG::Node * join_expressions_actions_node,
-    const NameSet & left_table_expression_columns_names,
-    const NameSet & right_table_expression_columns_names,
+const ActionsDAG::Node * appendExpression(
+    ActionsDAGPtr & dag,
+    const QueryTreeNodePtr & expression,
+    const PlannerContextPtr & planner_context,
+    const JoinNode & join_node)
+{
+    PlannerActionsVisitor join_expression_visitor(planner_context);
+    auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression);
+    if (join_expression_dag_node_raw_pointers.size() != 1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "JOIN {} ON clause contains multiple expressions",
+            join_node.formatASTForErrorMessage());
+
+    return join_expression_dag_node_raw_pointers[0];
+}
+
+void buildJoinClause(
+    ActionsDAGPtr & left_dag,
+    ActionsDAGPtr & right_dag,
+    const PlannerContextPtr & planner_context,
+    //ActionsDAGPtr join_expression_dag,
+    //const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
+    //const ActionsDAG::Node * join_expressions_actions_node,
+    const QueryTreeNodePtr & join_expression,
+    // const NameSet & left_table_expression_columns_names,
+    // const NameSet & right_table_expression_columns_names,
     const JoinNode & join_node,
     JoinClause & join_clause)
 {
     std::string function_name;
 
-    if (join_expressions_actions_node->function)
-        function_name = join_expressions_actions_node->function->getName();
+    //std::cerr << join_expression_dag->dumpDAG() << std::endl;
+    auto * function_node = join_expression->as<FunctionNode>();
+    if (function_node)
+        function_name = function_node->getFunction()->getName();
+
+    // if (join_expressions_actions_node->function)
+    //     function_name = join_expressions_actions_node->function->getName();
 
     /// For 'and' function go into children
     if (function_name == "and")
     {
-        for (const auto & child : join_expressions_actions_node->children)
+        for (const auto & child : function_node->getArguments())
         {
-            buildJoinClause(join_expression_dag,
-                join_expression_dag_input_nodes,
+            buildJoinClause(//join_expression_dag,
+                //join_expression_dag_input_nodes,
+                left_dag,
+                right_dag,
+                planner_context,
                 child,
-                left_table_expression_columns_names,
-                right_table_expression_columns_names,
+                // left_table_expression_columns_names,
+                // right_table_expression_columns_names,
                 join_node,
                 join_clause);
         }
@@ -193,45 +267,47 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
 
     if (function_name == "equals" || function_name == "isNotDistinctFrom" || is_asof_join_inequality)
     {
-        const auto * left_child = join_expressions_actions_node->children.at(0);
-        const auto * right_child = join_expressions_actions_node->children.at(1);
+        const auto left_child = function_node->getArguments().getNodes().at(0);//join_expressions_actions_node->children.at(0);
+        const auto right_child = function_node->getArguments().getNodes().at(1); //join_expressions_actions_node->children.at(1);
 
-        auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child,
-            join_expression_dag_input_nodes,
-            left_table_expression_columns_names,
-            right_table_expression_columns_names,
+        auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child.get(),
+            //join_expression_dag_input_nodes,
+            // left_table_expression_columns_names,
+            // right_table_expression_columns_names,
             join_node);
 
-        auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child,
-            join_expression_dag_input_nodes,
-            left_table_expression_columns_names,
-            right_table_expression_columns_names,
+        auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child.get(),
+            //join_expression_dag_input_nodes,
+            // left_table_expression_columns_names,
+            // right_table_expression_columns_names,
             join_node);
 
         if (!left_expression_side_optional && !right_expression_side_optional)
         {
             throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
-                "JOIN {} ON expression {} with constants is not supported",
-                join_node.formatASTForErrorMessage(),
-                join_expressions_actions_node->result_name);
+                "JOIN {} ON expression with constants is not supported",
+                join_node.formatASTForErrorMessage());
         }
         else if (left_expression_side_optional && !right_expression_side_optional)
         {
-            join_clause.addCondition(*left_expression_side_optional, join_expressions_actions_node);
+            const auto * node = appendExpression(left_dag, join_expression, planner_context, join_node);
+            join_clause.addCondition(*left_expression_side_optional, node);
         }
         else if (!left_expression_side_optional && right_expression_side_optional)
         {
-            join_clause.addCondition(*right_expression_side_optional, join_expressions_actions_node);
+            const auto * node = appendExpression(right_dag, join_expression, planner_context, join_node);
+            join_clause.addCondition(*right_expression_side_optional, node);
         }
         else
         {
+            // std::cerr << "===============\n";
             auto left_expression_side = *left_expression_side_optional;
             auto right_expression_side = *right_expression_side_optional;
 
             if (left_expression_side != right_expression_side)
             {
-                const ActionsDAG::Node * left_key = left_child;
-                const ActionsDAG::Node * right_key = right_child;
+                auto left_key = left_child;
+                auto right_key = right_child;
 
                 if (left_expression_side == JoinTableSide::Right)
                 {
@@ -240,6 +316,9 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
                     asof_inequality = reverseASOFJoinInequality(asof_inequality);
                 }
 
+                const auto * left_node = appendExpression(left_dag, left_key, planner_context, join_node);
+                const auto * right_node = appendExpression(right_dag, right_key, planner_context, join_node);
+
                 if (is_asof_join_inequality)
                 {
                     if (join_clause.hasASOF())
@@ -249,55 +328,63 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
                             join_node.formatASTForErrorMessage());
                     }
 
-                    join_clause.addASOFKey(left_key, right_key, asof_inequality);
+                    join_clause.addASOFKey(left_node, right_node, asof_inequality);
                 }
                 else
                 {
                     bool null_safe_comparison = function_name == "isNotDistinctFrom";
-                    join_clause.addKey(left_key, right_key, null_safe_comparison);
+                    join_clause.addKey(left_node, right_node, null_safe_comparison);
                 }
             }
             else
             {
-                join_clause.addCondition(left_expression_side, join_expressions_actions_node);
+                auto & dag = left_expression_side == JoinTableSide::Left ? left_dag : right_dag;
+                const auto * node = appendExpression(dag, join_expression, planner_context, join_node);
+                join_clause.addCondition(left_expression_side, node);
             }
         }
 
         return;
     }
 
-    auto expression_side_optional = extractJoinTableSideFromExpression(join_expressions_actions_node,
-        join_expression_dag_input_nodes,
-        left_table_expression_columns_names,
-        right_table_expression_columns_names,
+    auto expression_side_optional = extractJoinTableSideFromExpression(//join_expressions_actions_node,
+        //join_expression_dag_input_nodes,
+        join_expression.get(),
+        // left_table_expression_columns_names,
+        // right_table_expression_columns_names,
         join_node);
 
     if (!expression_side_optional)
         expression_side_optional = JoinTableSide::Right;
 
     auto expression_side = *expression_side_optional;
-    join_clause.addCondition(expression_side, join_expressions_actions_node);
+    auto & dag = expression_side == JoinTableSide::Left ? left_dag : right_dag;
+    const auto * node = appendExpression(dag, join_expression, planner_context, join_node);
+    join_clause.addCondition(expression_side, node);
 }
 
-JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName & join_expression_input_columns,
+JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName & join_expression_input_columns,
     const ColumnsWithTypeAndName & left_table_expression_columns,
     const ColumnsWithTypeAndName & right_table_expression_columns,
     const JoinNode & join_node,
     const PlannerContextPtr & planner_context)
 {
-    ActionsDAGPtr join_expression_actions = std::make_shared<ActionsDAG>(join_expression_input_columns);
+    //ActionsDAGPtr join_expression_actions = std::make_shared<ActionsDAG>(join_expression_input_columns);
+
+    ActionsDAGPtr left_join_actions = std::make_shared<ActionsDAG>(left_table_expression_columns);
+    ActionsDAGPtr right_join_actions = std::make_shared<ActionsDAG>(left_table_expression_columns);
 
     /** In ActionsDAG if input node has constant representation additional constant column is added.
       * That way we cannot simply check that node has INPUT type during resolution of expression join table side.
       * Put all nodes after actions dag initialization in set.
       * To check if actions dag node is input column, we check if set contains it.
       */
-    const auto & join_expression_actions_nodes = join_expression_actions->getNodes();
+    // const auto & join_expression_actions_nodes = join_expression_actions->getNodes();
 
-    std::unordered_set<const ActionsDAG::Node *> join_expression_dag_input_nodes;
-    join_expression_dag_input_nodes.reserve(join_expression_actions_nodes.size());
-    for (const auto & node : join_expression_actions_nodes)
-        join_expression_dag_input_nodes.insert(&node);
+    // std::unordered_set<const ActionsDAG::Node *> join_expression_dag_input_nodes;
+    // join_expression_dag_input_nodes.reserve(join_expression_actions_nodes.size());
+    // for (const auto & node : join_expression_actions_nodes)
+    //     join_expression_dag_input_nodes.insert(&node);
 
     /** It is possible to have constant value in JOIN ON section, that we need to ignore during DAG construction.
       * If we do not ignore it, this function will be replaced by underlying constant.
@@ -307,6 +394,9 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
       * ON (t1.id = t2.id) AND 1 != 1 AND (t1.value >= t1.value);
       */
     auto join_expression = join_node.getJoinExpression();
+    // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->formatConvertedASTForErrorMessage());
+    // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->dumpTree());
+
     auto * constant_join_expression = join_expression->as<ConstantNode>();
 
     if (constant_join_expression && constant_join_expression->hasSourceExpression())
@@ -318,18 +408,18 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
             "JOIN {} join expression expected function",
             join_node.formatASTForErrorMessage());
 
-    PlannerActionsVisitor join_expression_visitor(planner_context);
-    auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(join_expression_actions, join_expression);
-    if (join_expression_dag_node_raw_pointers.size() != 1)
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "JOIN {} ON clause contains multiple expressions",
-            join_node.formatASTForErrorMessage());
+    // PlannerActionsVisitor join_expression_visitor(planner_context);
+    // auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(join_expression_actions, join_expression);
+    // if (join_expression_dag_node_raw_pointers.size() != 1)
+    //     throw Exception(ErrorCodes::LOGICAL_ERROR,
+    //         "JOIN {} ON clause contains multiple expressions",
+    //         join_node.formatASTForErrorMessage());
 
-    const auto * join_expressions_actions_root_node = join_expression_dag_node_raw_pointers[0];
-    if (!join_expressions_actions_root_node->function)
-        throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
-            "JOIN {} join expression expected function",
-            join_node.formatASTForErrorMessage());
+    // const auto * join_expressions_actions_root_node = join_expression_dag_node_raw_pointers[0];
+    // if (!join_expressions_actions_root_node->function)
+    //     throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
+    //         "JOIN {} join expression expected function",
+    //         join_node.formatASTForErrorMessage());
 
     size_t left_table_expression_columns_size = left_table_expression_columns.size();
 
@@ -360,20 +450,23 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
     }
 
     JoinClausesAndActions result;
-    result.join_expression_actions = join_expression_actions;
+    //result.join_expression_actions = join_expression_actions;
 
-    const auto & function_name = join_expressions_actions_root_node->function->getName();
+    const auto & function_name = function_node->getFunction()->getName();
     if (function_name == "or")
     {
-        for (const auto & child : join_expressions_actions_root_node->children)
+        for (const auto & child : function_node->getArguments())
         {
             result.join_clauses.emplace_back();
 
-            buildJoinClause(join_expression_actions,
-                join_expression_dag_input_nodes,
+            buildJoinClause(//join_expression_actions,
+                //join_expression_dag_input_nodes,
+                left_join_actions,
+                right_join_actions,
+                planner_context,
                 child,
-                join_left_actions_names_set,
-                join_right_actions_names_set,
+                // join_left_actions_names_set,
+                // join_right_actions_names_set,
                 join_node,
                 result.join_clauses.back());
         }
@@ -382,11 +475,15 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
     {
         result.join_clauses.emplace_back();
 
-        buildJoinClause(join_expression_actions,
-                join_expression_dag_input_nodes,
-                join_expressions_actions_root_node,
-                join_left_actions_names_set,
-                join_right_actions_names_set,
+        buildJoinClause(
+                left_join_actions,
+                right_join_actions,
+                planner_context,
+                //join_expression_actions,
+                //join_expression_dag_input_nodes,
+                join_expression, //join_expressions_actions_root_node,
+                // join_left_actions_names_set,
+                // join_right_actions_names_set,
                 join_node,
                 result.join_clauses.back());
     }
@@ -411,12 +508,12 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
             const ActionsDAG::Node * dag_filter_condition_node = nullptr;
 
             if (left_filter_condition_nodes.size() > 1)
-                dag_filter_condition_node = &join_expression_actions->addFunction(and_function, left_filter_condition_nodes, {});
+                dag_filter_condition_node = &left_join_actions->addFunction(and_function, left_filter_condition_nodes, {});
             else
                 dag_filter_condition_node = left_filter_condition_nodes[0];
 
             join_clause.getLeftFilterConditionNodes() = {dag_filter_condition_node};
-            join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
+            left_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
 
             add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name);
         }
@@ -427,12 +524,12 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
             const ActionsDAG::Node * dag_filter_condition_node = nullptr;
 
             if (right_filter_condition_nodes.size() > 1)
-                dag_filter_condition_node = &join_expression_actions->addFunction(and_function, right_filter_condition_nodes, {});
+                dag_filter_condition_node = &right_join_actions->addFunction(and_function, right_filter_condition_nodes, {});
             else
                 dag_filter_condition_node = right_filter_condition_nodes[0];
 
             join_clause.getRightFilterConditionNodes() = {dag_filter_condition_node};
-            join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
+            right_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
 
             add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name);
         }
@@ -469,10 +566,10 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
                 }
 
                 if (!left_key_node->result_type->equals(*common_type))
-                    left_key_node = &join_expression_actions->addCast(*left_key_node, common_type, {});
+                    left_key_node = &left_join_actions->addCast(*left_key_node, common_type, {});
 
                 if (!right_key_node->result_type->equals(*common_type))
-                    right_key_node = &join_expression_actions->addCast(*right_key_node, common_type, {});
+                    right_key_node = &right_join_actions->addCast(*right_key_node, common_type, {});
             }
 
             if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable())
@@ -489,22 +586,22 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
                   *   SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b)
                   */
                 auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext());
-                left_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {});
-                right_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {});
+                left_key_node = &left_join_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {});
+                right_key_node = &right_join_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {});
             }
 
-            join_expression_actions->addOrReplaceInOutputs(*left_key_node);
-            join_expression_actions->addOrReplaceInOutputs(*right_key_node);
+            left_join_actions->addOrReplaceInOutputs(*left_key_node);
+            right_join_actions->addOrReplaceInOutputs(*right_key_node);
 
             add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name);
             add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name);
         }
     }
 
-    result.left_join_expressions_actions = join_expression_actions->clone();
+    result.left_join_expressions_actions = std::move(left_join_actions);
     result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names);
 
-    result.right_join_expressions_actions = join_expression_actions->clone();
+    result.right_join_expressions_actions = std::move(right_join_actions);
     result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);
 
     return result;
@@ -524,10 +621,10 @@ JoinClausesAndActions buildJoinClausesAndActions(
             "JOIN {} join does not have ON section",
             join_node_typed.formatASTForErrorMessage());
 
-    auto join_expression_input_columns = left_table_expression_columns;
-    join_expression_input_columns.insert(join_expression_input_columns.end(), right_table_expression_columns.begin(), right_table_expression_columns.end());
+    // auto join_expression_input_columns = left_table_expression_columns;
+    // join_expression_input_columns.insert(join_expression_input_columns.end(), right_table_expression_columns.begin(), right_table_expression_columns.end());
 
-    return buildJoinClausesAndActions(join_expression_input_columns, left_table_expression_columns, right_table_expression_columns, join_node_typed, planner_context);
+    return buildJoinClausesAndActions(/*join_expression_input_columns,*/ left_table_expression_columns, right_table_expression_columns, join_node_typed, planner_context);
 }
 
 std::optional<bool> tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node)
diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h
index 94f32e7ad51..63fb18f6f82 100644
--- a/src/Planner/PlannerJoins.h
+++ b/src/Planner/PlannerJoins.h
@@ -165,7 +165,7 @@ struct JoinClausesAndActions
     /// Join clauses. Actions dag nodes point into join_expression_actions.
     JoinClauses join_clauses;
     /// Whole JOIN ON section expressions
-    ActionsDAGPtr join_expression_actions;
+    //ActionsDAGPtr join_expression_actions;
     /// Left join expressions actions
     ActionsDAGPtr left_join_expressions_actions;
     /// Right join expressions actions

From 7267e4a5c76e8e6ea47b088d83a7187dff621d59 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 13 Dec 2023 11:27:12 +0000
Subject: [PATCH 022/204] Check all table expressions.

---
 src/Planner/PlannerJoins.cpp                  | 65 ++++++++++++-------
 .../02514_analyzer_drop_join_on.reference     |  2 +-
 2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 999475586fd..6976be9c20c 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -113,11 +113,22 @@ String JoinClause::dump() const
 namespace
 {
 
+using TableExpressionSet = std::unordered_set<const IQueryTreeNode *>;
+
+TableExpressionSet extractTableExpressionsSet(const QueryTreeNodePtr & node)
+{
+    TableExpressionSet res;
+    for (const auto & expr : extractTableExpressions(node))
+        res.insert(expr.get());
+
+    return res;
+}
+
 std::optional<JoinTableSide> extractJoinTableSideFromExpression(//const ActionsDAG::Node * expression_root_node,
     const IQueryTreeNode * expression_root_node,
     //const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
-    // const NameSet & left_table_expression_columns_names,
-    // const NameSet & right_table_expression_columns_names,
+    const TableExpressionSet & left_table_expressions,
+    const TableExpressionSet & right_table_expressions,
     const JoinNode & join_node)
 {
     std::optional<JoinTableSide> table_side;
@@ -138,8 +149,8 @@ std::optional<JoinTableSide> extractJoinTableSideFromExpression(//const ActionsD
     // for (const auto & r : right_table_expression_columns_names)
     //     std::cerr << r << std::endl;
 
-    const auto * left_table_expr = join_node.getLeftTableExpression().get();
-    const auto * right_table_expr = join_node.getRightTableExpression().get();
+    // const auto * left_table_expr = join_node.getLeftTableExpression().get();
+    // const auto * right_table_expr = join_node.getRightTableExpression().get();
 
     while (!nodes_to_process.empty())
     {
@@ -180,16 +191,16 @@ std::optional<JoinTableSide> extractJoinTableSideFromExpression(//const ActionsD
         if (!column_source)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "No source for column {} in JOIN {}", input_name, join_node.formatASTForErrorMessage());
 
-        bool is_column_from_left_expr = column_source == left_table_expr;
-        bool is_column_from_right_expr = column_source == right_table_expr;
+        bool is_column_from_left_expr = left_table_expressions.contains(column_source);
+        bool is_column_from_right_expr = right_table_expressions.contains(column_source);
 
         if (!is_column_from_left_expr && !is_column_from_right_expr)
             throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
                 "JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns",
                 join_node.formatASTForErrorMessage(),
-                input_name,
-                left_table_expr->formatASTForErrorMessage(),
-                right_table_expr->formatASTForErrorMessage());
+                column_source->formatASTForErrorMessage(),
+                join_node.getLeftTableExpression()->formatASTForErrorMessage(),
+                join_node.getRightTableExpression()->formatASTForErrorMessage());
 
         auto input_table_side = is_column_from_left_expr ? JoinTableSide::Left : JoinTableSide::Right;
         if (table_side && (*table_side) != input_table_side)
@@ -227,8 +238,8 @@ void buildJoinClause(
     //const std::unordered_set<const ActionsDAG::Node *> & join_expression_dag_input_nodes,
     //const ActionsDAG::Node * join_expressions_actions_node,
     const QueryTreeNodePtr & join_expression,
-    // const NameSet & left_table_expression_columns_names,
-    // const NameSet & right_table_expression_columns_names,
+    const TableExpressionSet & left_table_expressions,
+    const TableExpressionSet & right_table_expressions,
     const JoinNode & join_node,
     JoinClause & join_clause)
 {
@@ -253,8 +264,8 @@ void buildJoinClause(
                 right_dag,
                 planner_context,
                 child,
-                // left_table_expression_columns_names,
-                // right_table_expression_columns_names,
+                left_table_expressions,
+                right_table_expressions,
                 join_node,
                 join_clause);
         }
@@ -272,14 +283,14 @@ void buildJoinClause(
 
         auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child.get(),
             //join_expression_dag_input_nodes,
-            // left_table_expression_columns_names,
-            // right_table_expression_columns_names,
+            left_table_expressions,
+            right_table_expressions,
             join_node);
 
         auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child.get(),
             //join_expression_dag_input_nodes,
-            // left_table_expression_columns_names,
-            // right_table_expression_columns_names,
+            left_table_expressions,
+            right_table_expressions,
             join_node);
 
         if (!left_expression_side_optional && !right_expression_side_optional)
@@ -350,8 +361,8 @@ void buildJoinClause(
     auto expression_side_optional = extractJoinTableSideFromExpression(//join_expressions_actions_node,
         //join_expression_dag_input_nodes,
         join_expression.get(),
-        // left_table_expression_columns_names,
-        // right_table_expression_columns_names,
+        left_table_expressions,
+        right_table_expressions,
         join_node);
 
     if (!expression_side_optional)
@@ -372,7 +383,10 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
     //ActionsDAGPtr join_expression_actions = std::make_shared<ActionsDAG>(join_expression_input_columns);
 
     ActionsDAGPtr left_join_actions = std::make_shared<ActionsDAG>(left_table_expression_columns);
-    ActionsDAGPtr right_join_actions = std::make_shared<ActionsDAG>(left_table_expression_columns);
+    ActionsDAGPtr right_join_actions = std::make_shared<ActionsDAG>(right_table_expression_columns);
+
+    // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions cols {} ", left_join_actions->dumpDAG());
+    // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions cols {} ", right_join_actions->dumpDAG());
 
     /** In ActionsDAG if input node has constant representation additional constant column is added.
       * That way we cannot simply check that node has INPUT type during resolution of expression join table side.
@@ -449,6 +463,9 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
         join_right_actions_names_set.insert(right_table_expression_column.name);
     }
 
+    auto join_left_table_expressions = extractTableExpressionsSet(join_node.getLeftTableExpression());
+    auto join_right_table_expressions = extractTableExpressionsSet(join_node.getRightTableExpression());
+
     JoinClausesAndActions result;
     //result.join_expression_actions = join_expression_actions;
 
@@ -465,8 +482,8 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
                 right_join_actions,
                 planner_context,
                 child,
-                // join_left_actions_names_set,
-                // join_right_actions_names_set,
+                join_left_table_expressions,
+                join_right_table_expressions,
                 join_node,
                 result.join_clauses.back());
         }
@@ -482,8 +499,8 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
                 //join_expression_actions,
                 //join_expression_dag_input_nodes,
                 join_expression, //join_expressions_actions_root_node,
-                // join_left_actions_names_set,
-                // join_right_actions_names_set,
+                join_left_table_expressions,
+                join_right_table_expressions,
                 join_node,
                 result.join_clauses.back());
     }
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index 7e327a863cf..2c62e278050 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -147,5 +147,5 @@ Header: bx String
                       c2 String
         Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
         Header: __table5.d1 UInt64
-          ReadFromStorage (SystemNumbers)
+          ReadFromSystemNumbers
           Header: number UInt64

From 093431be9929f11549bfcb04bb64a708808621ed Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 13 Dec 2023 14:21:42 +0000
Subject: [PATCH 023/204] Fix some tests.

---
 src/Analyzer/Utils.cpp                                        | 4 +++-
 src/Analyzer/Utils.h                                          | 2 +-
 src/Planner/PlannerJoins.cpp                                  | 2 +-
 .../0_stateless/02479_analyzer_join_with_constants.sql        | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp
index 918126e0ccc..e2262912821 100644
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@@ -326,7 +326,7 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
     }
 }
 
-QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node)
+QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join)
 {
     QueryTreeNodes result;
 
@@ -357,6 +357,8 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node)
             {
                 auto & array_join_node = node_to_process->as<ArrayJoinNode &>();
                 nodes_to_process.push_front(array_join_node.getTableExpression());
+                if (add_array_join)
+                    result.push_back(std::move(node_to_process));
                 break;
             }
             case QueryTreeNodeType::JOIN:
diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h
index 060dc7d8bc0..6b997d5a13e 100644
--- a/src/Analyzer/Utils.h
+++ b/src/Analyzer/Utils.h
@@ -51,7 +51,7 @@ std::optional<bool> tryExtractConstantFromConditionNode(const QueryTreeNodePtr &
 void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options);
 
 /// Extract table, table function, query, union from join tree
-QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);
+QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join = false);
 
 /// Extract left table expression from join tree
 QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node);
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 6976be9c20c..fb7a4262c5a 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -118,7 +118,7 @@ using TableExpressionSet = std::unordered_set<const IQueryTreeNode *>;
 TableExpressionSet extractTableExpressionsSet(const QueryTreeNodePtr & node)
 {
     TableExpressionSet res;
-    for (const auto & expr : extractTableExpressions(node))
+    for (const auto & expr : extractTableExpressions(node, true))
         res.insert(expr.get());
 
     return res;
diff --git a/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql b/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
index 99f20290ff0..50248665bc9 100644
--- a/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
+++ b/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
@@ -24,4 +24,4 @@ SELECT * FROM (SELECT 1 AS id, 1 AS value) AS t1 ASOF LEFT JOIN (SELECT 1 AS id,
 
 SELECT '--';
 
-SELECT b.dt FROM (SELECT NULL > NULL AS pk, 1 AS dt FROM numbers(5)) AS a ASOF LEFT JOIN (SELECT NULL AS pk, 1 AS dt) AS b ON (a.pk = b.pk) AND 1 != 1 AND (a.dt >= b.dt); -- { serverError 403 }
+SELECT b.dt FROM (SELECT NULL > NULL AS pk, 1 AS dt FROM numbers(5)) AS a ASOF LEFT JOIN (SELECT NULL AS pk, 1 AS dt) AS b ON (a.pk = b.pk) AND 1 != 1 AND (a.dt >= b.dt); -- { serverError 403, NOT_FOUND_COLUMN_IN_BLOCK }

From 15f00d012bd8a311941c060233a913b82f6c4aba Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 13 Dec 2023 15:13:54 +0000
Subject: [PATCH 024/204] Fix another test.

---
 contrib/librdkafka                                              | 2 +-
 .../0_stateless/02771_parallel_replicas_analyzer.reference      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/librdkafka b/contrib/librdkafka
index 2d2aab6f5b7..6f3b483426a 160000
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@@ -1 +1 @@
-Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082
+Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f
diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
index 35573110550..3b8a394a522 100644
--- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
+++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
@@ -9,4 +9,4 @@
 7885388429666205427
 8124171311239967992
 1	1	-- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n    SETTINGS\n    allow_experimental_analyzer = 1,\n    max_parallel_replicas = 2,\n    cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n    allow_experimental_parallel_reading_from_replicas = 1;
-0	2	SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1
+0	2	SELECT `__table1`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` AS `__table1` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1

From 7bb783b80ab6147b6a47dcff4684862bb04af3c2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 13 Dec 2023 16:27:28 +0000
Subject: [PATCH 025/204] Restore contrib back

---
 contrib/librdkafka | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/librdkafka b/contrib/librdkafka
index 6f3b483426a..2d2aab6f5b7 160000
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@@ -1 +1 @@
-Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f
+Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082

From 87a2cbb9fbf93af10e7678c92614735fb47458cc Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 13 Dec 2023 17:53:04 +0000
Subject: [PATCH 026/204] Fix another one test.

---
 .../02675_predicate_push_down_filled_join_fix.reference         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
index 31e2d0748e1..e6c4d5768af 100644
--- a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
+++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
@@ -16,7 +16,7 @@ Positions: 3 0 1
   Type: INNER
   Strictness: ALL
   Algorithm: HashJoin
-  Clauses: [(id_0) = (id)]
+  Clauses: [(__table1.id) = (id)]
     Filter (( + (JOIN actions + Change column names to column identifiers)))
     Header: __table1.id UInt64
             __table1.value String

From 8c2fa32a5a6076475036eb03801a011c01cd0bbe Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 14 Dec 2023 13:04:47 +0000
Subject: [PATCH 027/204] Keep old actions

---
 src/Planner/PlannerJoins.cpp | 4 ++--
 src/Planner/PlannerJoins.h   | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index fb7a4262c5a..830d7135f6f 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -615,10 +615,10 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
         }
     }
 
-    result.left_join_expressions_actions = std::move(left_join_actions);
+    result.left_join_expressions_actions = left_join_actions->clone();
     result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names);
 
-    result.right_join_expressions_actions = std::move(right_join_actions);
+    result.right_join_expressions_actions = right_join_actions->clone();
     result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);
 
     return result;
diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h
index 63fb18f6f82..7bc65cfb544 100644
--- a/src/Planner/PlannerJoins.h
+++ b/src/Planner/PlannerJoins.h
@@ -165,7 +165,8 @@ struct JoinClausesAndActions
     /// Join clauses. Actions dag nodes point into join_expression_actions.
     JoinClauses join_clauses;
     /// Whole JOIN ON section expressions
-    //ActionsDAGPtr join_expression_actions;
+    ActionsDAGPtr left_join_tmp_expression_actions;
+    ActionsDAGPtr right_join_tmp_expression_actions;
     /// Left join expressions actions
     ActionsDAGPtr left_join_expressions_actions;
     /// Right join expressions actions

From 676b913a92a637f85af6181bd4daf53d7d9d0a95 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 14 Dec 2023 13:52:45 +0000
Subject: [PATCH 028/204] Keep old actions.

---
 src/Planner/PlannerJoins.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 830d7135f6f..d96d64fff0d 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -616,9 +616,11 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
     }
 
     result.left_join_expressions_actions = left_join_actions->clone();
+    result.left_join_tmp_expression_actions = std::move(left_join_actions);
     result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names);
 
     result.right_join_expressions_actions = right_join_actions->clone();
+    result.right_join_tmp_expression_actions = std::move(right_join_actions);
     result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);
 
     return result;

From 80e9d7c45f4bdef9efb5c23822bc14e992650b5c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 14 Dec 2023 14:51:38 +0000
Subject: [PATCH 029/204] Fix another case.,

---
 src/Planner/PlannerJoins.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index d96d64fff0d..91d34258d61 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -301,12 +301,14 @@ void buildJoinClause(
         }
         else if (left_expression_side_optional && !right_expression_side_optional)
         {
-            const auto * node = appendExpression(left_dag, join_expression, planner_context, join_node);
+            auto & dag = *left_expression_side_optional == JoinTableSide::Left ? left_dag : right_dag;
+            const auto * node = appendExpression(dag, join_expression, planner_context, join_node);
             join_clause.addCondition(*left_expression_side_optional, node);
         }
         else if (!left_expression_side_optional && right_expression_side_optional)
         {
-            const auto * node = appendExpression(right_dag, join_expression, planner_context, join_node);
+            auto & dag = *right_expression_side_optional == JoinTableSide::Left ? left_dag : right_dag;
+            const auto * node = appendExpression(dag, join_expression, planner_context, join_node);
             join_clause.addCondition(*right_expression_side_optional, node);
         }
         else
@@ -619,6 +621,11 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName
     result.left_join_tmp_expression_actions = std::move(left_join_actions);
     result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names);
 
+    // for (const auto & name : join_right_actions_names)
+    //     std::cerr << ".. " << name << std::endl;
+
+    // std::cerr << right_join_actions->dumpDAG() << std::endl;
+
     result.right_join_expressions_actions = right_join_actions->clone();
     result.right_join_tmp_expression_actions = std::move(right_join_actions);
     result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);

From 18e29bc6a20b9fc63160385f8d967202c5b44eda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 10:46:30 +0800
Subject: [PATCH 030/204] BloomFilter support match function

---
 src/Common/OptimizedRegularExpression.h       |  1 +
 .../MergeTree/MergeTreeIndexFullText.cpp      | 99 +++++++++++++++++++
 .../MergeTree/MergeTreeIndexFullText.h        |  2 +
 3 files changed, 102 insertions(+)

diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h
index 4521b81dfe2..a4418df698a 100644
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@@ -106,6 +106,7 @@ public:
         bool & required_substring_is_prefix,
         std::vector<std::string> & alternatives);
 
+
 private:
     bool is_trivial;
     bool required_substring_is_prefix;
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 7dbe7a0cbe4..bf954fd1e46 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -17,6 +17,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Core/Defines.h>
+#include <Common/OptimizedRegularExpression.h>
 
 #include <Poco/Logger.h>
 
@@ -201,6 +202,8 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
              || element.function == RPNElement::FUNCTION_IN
              || element.function == RPNElement::FUNCTION_NOT_IN
              || element.function == RPNElement::FUNCTION_MULTI_SEARCH
+             || element.function == RPNElement::FUNCTION_MATCH
+             || element.function == RPNElement::FUNCTION_MULTI_MATCH
              || element.function == RPNElement::ALWAYS_FALSE)
         {
             rpn_stack.push_back(false);
@@ -233,6 +236,7 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
 /// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart
 bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
+    std::cout<<"======== Flag into mayBeTrueOnGranule"<<std::endl;
     std::shared_ptr<MergeTreeIndexGranuleFullText> granule
             = std::dynamic_pointer_cast<MergeTreeIndexGranuleFullText>(idx_granule);
     if (!granule)
@@ -286,6 +290,41 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
             rpn_stack.emplace_back(
                     std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
         }
+        else if (element.function == RPNElement::FUNCTION_MATCH)
+        {
+            // If bloom filter is not null means we got required substring
+
+            if (!element.set_bloom_filters.empty())
+            {
+
+                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+                const auto & bloom_filters = element.set_bloom_filters[0];
+
+                for (size_t row = 0; row < bloom_filters.size(); ++row)
+                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+                if (element.bloom_filter)
+                {
+                    //auto required = rpn_stack.back();
+                    //rpn_stack.pop_back();
+                    //auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
+                    //rpn_stack.emplace_back(required.can_be_true && alternative, true);
+                    auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
+                    rpn_stack.emplace_back(alternative, true);
+                }
+                else
+                    rpn_stack.emplace_back(
+                            std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+            }
+            //TODO: Need to check why bloom_filter is not null while set_bloom_filters is not empty
+            else if (element.bloom_filter)
+            {
+                std::cout<<"=========== Bloom Filter is not null"<<std::endl;
+                rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
+            }
+
+        }
         else if (element.function == RPNElement::FUNCTION_NOT)
         {
             rpn_stack.back() = !rpn_stack.back();
@@ -390,6 +429,8 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
                  function_name == "notEquals" ||
                  function_name == "has" ||
                  function_name == "mapContains" ||
+                 function_name == "match" ||
+                 function_name == "multiMatchAny" ||
                  function_name == "like" ||
                  function_name == "notLike" ||
                  function_name.starts_with("hasToken") ||
@@ -510,6 +551,64 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
         return true;
     }
+
+    if (function_name == "match")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_MATCH;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+
+        auto & string_view = const_value.get<String>();
+        String required_substring;
+        bool is_trivial;
+        bool required_substring_is_prefix;
+        std::vector<String> alternatives;
+        OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
+        std::cout<<"========= is trivial:"<<is_trivial<<std::endl;
+        std::cout<<"========= required_substring_is_prefix:"<<required_substring_is_prefix<<std::endl;
+        std::cout<<"========= regex string is:"<<string_view<<std::endl;
+        std::cout<<"========= required sub string is:"<<required_substring<<std::endl;
+        for (const auto & alternative : alternatives)
+            std::cout<<"========= alternative string:"<<alternative<<std::endl;
+
+        if (required_substring.empty() && alternatives.empty())
+            return false;
+
+        if (!alternatives.empty())
+        {
+            std::vector<std::vector<BloomFilter>> bloom_filters;
+            bloom_filters.emplace_back();
+            for (const auto & alternative : alternatives)
+            {
+                bloom_filters.back().emplace_back(params);
+                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
+            }
+            out.set_bloom_filters = std::move(bloom_filters);
+        }
+        else if (!required_substring.empty())
+           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
+
+        return true;
+    }
+
+    if (function_name == "notEquals")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_NOT_EQUALS;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+        const auto & value = const_value.get<String>();
+        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
+        return true;
+    }
+    else if (function_name == "equals")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_EQUALS;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+        const auto & value = const_value.get<String>();
+        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
+        return true;
+    }
     else if (function_name == "has")
     {
         out.key_column = *key_index;
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h
index fbfa0fd27fc..85c873f42ba 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -90,8 +90,10 @@ private:
             FUNCTION_NOT_EQUALS,
             FUNCTION_HAS,
             FUNCTION_IN,
+            FUNCTION_MATCH,
             FUNCTION_NOT_IN,
             FUNCTION_MULTI_SEARCH,
+            FUNCTION_MULTI_MATCH,
             FUNCTION_UNKNOWN, /// Can take any value.
             /// Operators of the logical expression.
             FUNCTION_NOT,

From e36cd6a06e303976b9c2705788d5070151d17233 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 10:56:52 +0800
Subject: [PATCH 031/204] BloomFilter support match function

---
 src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 6 ------
 src/Storages/MergeTree/MergeTreeIndexFullText.h   | 1 -
 2 files changed, 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index bf954fd1e46..6e8c517e883 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -203,7 +203,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
              || element.function == RPNElement::FUNCTION_NOT_IN
              || element.function == RPNElement::FUNCTION_MULTI_SEARCH
              || element.function == RPNElement::FUNCTION_MATCH
-             || element.function == RPNElement::FUNCTION_MULTI_MATCH
              || element.function == RPNElement::ALWAYS_FALSE)
         {
             rpn_stack.push_back(false);
@@ -236,7 +235,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
 /// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart
 bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
-    std::cout<<"======== Flag into mayBeTrueOnGranule"<<std::endl;
     std::shared_ptr<MergeTreeIndexGranuleFullText> granule
             = std::dynamic_pointer_cast<MergeTreeIndexGranuleFullText>(idx_granule);
     if (!granule)
@@ -564,10 +562,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         bool required_substring_is_prefix;
         std::vector<String> alternatives;
         OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
-        std::cout<<"========= is trivial:"<<is_trivial<<std::endl;
-        std::cout<<"========= required_substring_is_prefix:"<<required_substring_is_prefix<<std::endl;
-        std::cout<<"========= regex string is:"<<string_view<<std::endl;
-        std::cout<<"========= required sub string is:"<<required_substring<<std::endl;
         for (const auto & alternative : alternatives)
             std::cout<<"========= alternative string:"<<alternative<<std::endl;
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h
index 85c873f42ba..c35b2f2f3c4 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -93,7 +93,6 @@ private:
             FUNCTION_MATCH,
             FUNCTION_NOT_IN,
             FUNCTION_MULTI_SEARCH,
-            FUNCTION_MULTI_MATCH,
             FUNCTION_UNKNOWN, /// Can take any value.
             /// Operators of the logical expression.
             FUNCTION_NOT,

From ebc570aef5dcb470343ced071d4dccdc5f38f175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:31:07 +0800
Subject: [PATCH 032/204] optimize code

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 50 ++++++-------------
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 6e8c517e883..fd577ed93db 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -242,6 +242,18 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
+    auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
+    {
+        std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+        const auto & bloom_filters = element.set_bloom_filters[0];
+
+        for (size_t row = 0; row < bloom_filters.size(); ++row)
+            result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+        rpn_stack.emplace_back(
+                std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+    };
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -278,50 +290,19 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         }
         else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH)
         {
-            std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-            const auto & bloom_filters = element.set_bloom_filters[0];
-
-            for (size_t row = 0; row < bloom_filters.size(); ++row)
-                result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-            rpn_stack.emplace_back(
-                    std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+            multi_funtion_processor(element);
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
-            // If bloom filter is not null means we got required substring
-
+            // If set_bloom_filters is not empty means we got alternative substring
             if (!element.set_bloom_filters.empty())
             {
-
-                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-                const auto & bloom_filters = element.set_bloom_filters[0];
-
-                for (size_t row = 0; row < bloom_filters.size(); ++row)
-                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-                if (element.bloom_filter)
-                {
-                    //auto required = rpn_stack.back();
-                    //rpn_stack.pop_back();
-                    //auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
-                    //rpn_stack.emplace_back(required.can_be_true && alternative, true);
-                    auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
-                    rpn_stack.emplace_back(alternative, true);
-                }
-                else
-                    rpn_stack.emplace_back(
-                            std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+                multi_funtion_processor(element);
             }
-            //TODO: Need to check why bloom_filter is not null while set_bloom_filters is not empty
             else if (element.bloom_filter)
             {
-                std::cout<<"=========== Bloom Filter is not null"<<std::endl;
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
             }
-
         }
         else if (element.function == RPNElement::FUNCTION_NOT)
         {
@@ -428,7 +409,6 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
                  function_name == "has" ||
                  function_name == "mapContains" ||
                  function_name == "match" ||
-                 function_name == "multiMatchAny" ||
                  function_name == "like" ||
                  function_name == "notLike" ||
                  function_name.starts_with("hasToken") ||

From 57a5bef09e1354baff359f16362c5251ecc4364c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:49:56 +0800
Subject: [PATCH 033/204] optimize code

---
 .../MergeTree/MergeTreeIndexFullText.cpp       | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index fd577ed93db..23b95ed2c7d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -565,24 +565,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         return true;
     }
 
-    if (function_name == "notEquals")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_NOT_EQUALS;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-        const auto & value = const_value.get<String>();
-        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
-        return true;
-    }
-    else if (function_name == "equals")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_EQUALS;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-        const auto & value = const_value.get<String>();
-        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
-        return true;
-    }
     else if (function_name == "has")
     {
         out.key_column = *key_index;

From 3bd7505a836a2181bc0aa92cb344a9c34f581c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:50:56 +0800
Subject: [PATCH 034/204] optimize code

---
 src/Common/OptimizedRegularExpression.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h
index a4418df698a..4521b81dfe2 100644
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@@ -106,7 +106,6 @@ public:
         bool & required_substring_is_prefix,
         std::vector<std::string> & alternatives);
 
-
 private:
     bool is_trivial;
     bool required_substring_is_prefix;

From 5c1ed3e2c249e1f20714dc9d642ad30b620ad938 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 15 Dec 2023 13:00:10 +0000
Subject: [PATCH 035/204] Fix additional filters.

---
 src/Analyzer/IQueryTreeNode.h                         | 11 +++++++++++
 src/Planner/PlannerJoinTree.cpp                       |  2 +-
 .../0_stateless/01881_join_on_conditions_hash.sql.j2  |  2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
index 922eaabe75c..b07aa2d31b0 100644
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -143,9 +143,17 @@ public:
         return alias;
     }
 
+    const String & getOriginalAlias() const
+    {
+        return original_alias.empty() ? alias : original_alias;
+    }
+
     /// Set node alias
     void setAlias(String alias_value)
     {
+        if (original_alias.empty())
+            original_alias = std::move(alias);
+
         alias = std::move(alias_value);
     }
 
@@ -276,6 +284,9 @@ protected:
 
 private:
     String alias;
+    /// An alias from query. Alias can be replaced by query passes,
+    /// but we need to keep the original one to support additional_table_filters.
+    String original_alias;
     ASTPtr original_ast;
 };
 
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 3dd37d8a5b1..99fecacf9ce 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -812,7 +812,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     }
                 }
 
-                const auto & table_expression_alias = table_expression->getAlias();
+                const auto & table_expression_alias = table_expression->getOriginalAlias();
                 auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context);
                 add_filter(additional_filters_info, "additional filter");
 
diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2
index fafefd72cb8..bd20d34b684 100644
--- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2
+++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2
@@ -30,7 +30,7 @@ SELECT t1.key, t1.key2 FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key ==
 
 SELECT '--';
 SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2;
-SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION }
+SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION,NOT_FOUND_COLUMN_IN_BLOCK }
 
 SELECT '--';
 SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id > 2;

From fd64599127f83ffeb9d83cf2657c4252540a216a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 15 Dec 2023 15:15:07 +0000
Subject: [PATCH 036/204] Re-visit subquery for IN in
 CreateUniqueTableAliasesVisitor.

---
 src/Analyzer/createUniqueTableAliases.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index b49b433bb91..2e297d9abdf 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -1,9 +1,11 @@
 #include <memory>
 #include <unordered_map>
 #include <Analyzer/createUniqueTableAliases.h>
+#include <Analyzer/FunctionNode.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/LambdaNode.h>
+#include <Analyzer/Utils.h>
 
 namespace DB
 {
@@ -26,6 +28,7 @@ public:
     void enterImpl(QueryTreeNodePtr & node)
     {
         auto node_type = node->getNodeType();
+
         switch (node_type)
         {
             case QueryTreeNodeType::QUERY:
@@ -90,6 +93,24 @@ public:
             }
             scope_nodes_stack.pop_back();
         }
+
+        /// Here we revisit subquery for IN function. Reasons:
+        /// * For remote query execution, query tree may be traversed a few times.
+        ///   In such a case, it is possible to get AST like
+        ///   `IN ((SELECT ... FROM table AS __table4) AS __table1)` which result in
+        ///   `Multiple expressions for the alias` exception
+        /// * Tables in subqueries could have different aliases => different three hashes,
+        ///   which is important to be able to find a set in PreparedSets
+        /// See 01253_subquery_in_aggregate_function_JustStranger.
+        ///
+        /// So, we revisit this subquery to make aliases stable.
+        /// This should be safe cause columns from IN subquery can't be used in main query anyway.
+        if (node->getNodeType() == QueryTreeNodeType::FUNCTION)
+        {
+            auto * function_node = node->as<FunctionNode>();
+            if (isNameOfInFunction(function_node->getFunctionName()))
+                CreateUniqueTableAliasesVisitor(getContext()).visit(function_node->getArguments().getNodes().back());
+        }
     }
 
 private:

From 2f9537b7e0bff90569ecf9b6ffbde8a5a6e4f84e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 15 Dec 2023 18:12:59 +0000
Subject: [PATCH 037/204] Keep alias for clonned dummy tables.

---
 src/Planner/Utils.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp
index 9a6ef6f5d83..ba29cab5956 100644
--- a/src/Planner/Utils.cpp
+++ b/src/Planner/Utils.cpp
@@ -357,6 +357,7 @@ QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, con
 
 QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & query_node,
     const ContextPtr & context,
+    //PlannerContext & planner_context,
     ResultReplacementMap * result_replacement_map)
 {
     auto & query_node_typed = query_node->as<QueryNode &>();
@@ -406,6 +407,13 @@ QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr &
         if (result_replacement_map)
             result_replacement_map->emplace(table_expression, dummy_table_node);
 
+        dummy_table_node->setAlias(table_expression->getAlias());
+
+        // auto & src_table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression);
+        // auto & dst_table_expression_data = planner_context.getOrCreateTableExpressionData(dummy_table_node);
+
+        // dst_table_expression_data = src_table_expression_data;
+
         replacement_map.emplace(table_expression.get(), std::move(dummy_table_node));
     }
 

From 8daa8e509a57344fbaaa58e9769aa28baae3a8d9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 15 Dec 2023 21:42:20 +0000
Subject: [PATCH 038/204] Try recreate context for IN subqueries.

---
 src/Planner/Planner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 12e8d795347..de40f0a7d1a 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1054,7 +1054,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
         Planner subquery_planner(
             query_tree,
             subquery_options,
-            planner_context->getGlobalPlannerContext());
+            std::make_shared<GlobalPlannerContext>()); //planner_context->getGlobalPlannerContext());
         subquery_planner.buildQueryPlanIfNeeded();
 
         subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));

From e7e5fc2891707474c1fa817c0babcc602bf96431 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 16 Dec 2023 12:01:36 +0000
Subject: [PATCH 039/204] Another attempt

---
 src/Storages/buildQueryTreeForShard.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index c8a4b99474f..529fb9cac4a 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -373,7 +373,9 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
 
     removeGroupingFunctionSpecializations(query_tree_to_modify);
 
-    createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
+    // std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl;
+    // createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
+    // std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl;
 
     return query_tree_to_modify;
 }

From 288888f9502c9c18ec4d418c9408a39c4bcb85fa Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 16 Dec 2023 15:41:02 +0000
Subject: [PATCH 040/204] Another try.

---
 src/Analyzer/createUniqueTableAliases.cpp | 7 ++++++-
 src/Storages/buildQueryTreeForShard.cpp   | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 2e297d9abdf..8f850fe8dec 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -109,7 +109,12 @@ public:
         {
             auto * function_node = node->as<FunctionNode>();
             if (isNameOfInFunction(function_node->getFunctionName()))
-                CreateUniqueTableAliasesVisitor(getContext()).visit(function_node->getArguments().getNodes().back());
+            {
+                auto arg = function_node->getArguments().getNodes().back();
+                /// Avoid aliasing IN `table`
+                if (arg->getNodeType() != QueryTreeNodeType::TABLE)
+                    CreateUniqueTableAliasesVisitor(getContext()).visit(function_node->getArguments().getNodes().back());
+            }
         }
     }
 
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 529fb9cac4a..00cc5e3ee58 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -374,7 +374,7 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
     removeGroupingFunctionSpecializations(query_tree_to_modify);
 
     // std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl;
-    // createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
+    createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
     // std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl;
 
     return query_tree_to_modify;

From 93b18a32c3948d0d48bf724bc9745f530c83970a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Mon, 18 Dec 2023 16:02:19 +0800
Subject: [PATCH 041/204] add tests

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 13 +++--
 .../02943_tokenbf_support_match.reference     | 51 +++++++++++++++++++
 .../02943_tokenbf_support_match.sql           | 43 ++++++++++++++++
 3 files changed, 100 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.reference
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.sql

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 23b95ed2c7d..2a206f69024 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -242,6 +242,7 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
+
     auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
     {
         std::vector<bool> result(element.set_bloom_filters.back().size(), true);
@@ -254,6 +255,7 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         rpn_stack.emplace_back(
                 std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
     };
+
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -294,11 +296,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
-            // If set_bloom_filters is not empty means we got alternative substring
             if (!element.set_bloom_filters.empty())
             {
                 multi_funtion_processor(element);
             }
+            // If set_bloom_filters is not empty means we got alternative substring
             else if (element.bloom_filter)
             {
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
@@ -538,12 +540,9 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
 
         auto & string_view = const_value.get<String>();
         String required_substring;
-        bool is_trivial;
-        bool required_substring_is_prefix;
         std::vector<String> alternatives;
-        OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
-        for (const auto & alternative : alternatives)
-            std::cout<<"========= alternative string:"<<alternative<<std::endl;
+        bool tmp_var;
+        OptimizedRegularExpression::analyze(string_view, required_substring, tmp_var, tmp_var, alternatives);
 
         if (required_substring.empty() && alternatives.empty())
             return false;
@@ -559,7 +558,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
             }
             out.set_bloom_filters = std::move(bloom_filters);
         }
-        else if (!required_substring.empty())
+        else
            token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
 
         return true;
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
new file mode 100644
index 00000000000..7e36857190a
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -0,0 +1,51 @@
+============== SKIP 3 GRANUS ============
+============== Required String: Hello ============
+============== Alternative String: Hello ClickHouse ============
+============== Required String: Hello World ============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 2/5
+
+
+============== SKIP 3 GRANUS ============
+============== No Required String ============
+============== Alternative String: ClickHouse ============
+============== Alternative String: World ============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 2/5
+
+
+============== SKIP 4 GRANUS ============
+============== Required String: OLAP============
+============== No Alternative String============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
new file mode 100644
index 00000000000..078e32ae94c
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -0,0 +1,43 @@
+DROP DATABASE IF EXISTS test_tokenbf_match;
+
+CREATE DATABASE test_tokenbf_match;
+
+CREATE TABLE test_tokenbf_match.test_tokenbf 
+(
+    `id` UInt32,
+    `str` String,
+    INDEX str_idx str TYPE tokenbf_v1(256, 2, 0) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+ 
+INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
+
+SELECT '============== SKIP 3 GRANUS ============';
+SELECT '============== Required String: Hello ============';
+SELECT '============== Alternative String: Hello ClickHouse ============';
+SELECT '============== Required String: Hello World ============';
+
+EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
+
+SELECT '';
+SELECT '';
+
+SELECT '============== SKIP 3 GRANUS ============';
+SELECT '============== No Required String ============';
+SELECT '============== Alternative String: ClickHouse ============';
+SELECT '============== Alternative String: World ============';
+
+EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
+
+SELECT '';
+SELECT '';
+
+SELECT '============== SKIP 4 GRANUS ============';
+SELECT '============== Required String: OLAP============';
+SELECT '============== No Alternative String============';
+
+EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
+
+DROP DATABASE IF EXISTS test_tokenbf_match;

From 761554e86d04ca3d7984037cfba72a34e00b1498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Mon, 18 Dec 2023 18:08:41 +0800
Subject: [PATCH 042/204] fix test

---
 .../02943_tokenbf_support_match.reference     | 28 ++++++++++-------
 .../02943_tokenbf_support_match.sql           | 30 ++++++++++++-------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
index 7e36857190a..241346c13d7 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -1,7 +1,9 @@
-============== SKIP 3 GRANUS ============
-============== Required String: Hello ============
-============== Alternative String: Hello ClickHouse ============
-============== Required String: Hello World ============
+========================================
+| SKIP 3 GRANUS                        |
+| Required String: Hello               |
+| Alternative String: Hello ClickHouse |
+| Alternative String: Hello World      |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
@@ -16,10 +18,12 @@ Expression ((Projection + Before ORDER BY))
       Granules: 2/5
 
 
-============== SKIP 3 GRANUS ============
-============== No Required String ============
-============== Alternative String: ClickHouse ============
-============== Alternative String: World ============
+========================================
+| SKIP 3 GRANUS                        |
+| No Required String                   |
+| Alternative String: ClickHouse       |
+| Alternative String: World            |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
@@ -34,9 +38,11 @@ Expression ((Projection + Before ORDER BY))
       Granules: 2/5
 
 
-============== SKIP 4 GRANUS ============
-============== Required String: OLAP============
-============== No Alternative String============
+========================================
+| SKIP 4 GRANUS                        |
+| Required String: OLAP                |
+| No Alternative String                |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
index 078e32ae94c..2a98151624c 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel
+
 DROP DATABASE IF EXISTS test_tokenbf_match;
 
 CREATE DATABASE test_tokenbf_match;
@@ -14,29 +16,35 @@ SETTINGS index_granularity = 1;
  
 INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
 
-SELECT '============== SKIP 3 GRANUS ============';
-SELECT '============== Required String: Hello ============';
-SELECT '============== Alternative String: Hello ClickHouse ============';
-SELECT '============== Required String: Hello World ============';
+SELECT '========================================';
+SELECT '| SKIP 3 GRANUS                        |';
+SELECT '| Required String: Hello               |';
+SELECT '| Alternative String: Hello ClickHouse |';
+SELECT '| Alternative String: Hello World      |';
+SELECT '========================================';
 
 EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '============== SKIP 3 GRANUS ============';
-SELECT '============== No Required String ============';
-SELECT '============== Alternative String: ClickHouse ============';
-SELECT '============== Alternative String: World ============';
+SELECT '========================================';
+SELECT '| SKIP 3 GRANUS                        |';
+SELECT '| No Required String                   |';
+SELECT '| Alternative String: ClickHouse       |';
+SELECT '| Alternative String: World            |';
+SELECT '========================================';
 
 EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '============== SKIP 4 GRANUS ============';
-SELECT '============== Required String: OLAP============';
-SELECT '============== No Alternative String============';
+SELECT '========================================';
+SELECT '| SKIP 4 GRANUS                        |';
+SELECT '| Required String: OLAP                |';
+SELECT '| No Alternative String                |';
+SELECT '========================================';
 
 EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
 

From 9b628aa9d91ed073be4bd9572d75e9cccdc03478 Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Mon, 18 Dec 2023 13:25:43 +0000
Subject: [PATCH 043/204] feat: add server setting runtime reload

---
 .../System/StorageSystemServerSettings.cpp    | 79 +++++++++++++++----
 1 file changed, 64 insertions(+), 15 deletions(-)

diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index 3a3acabc5a3..ded5d8e8fae 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -1,6 +1,7 @@
 #include <Storages/System/StorageSystemServerSettings.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
@@ -18,6 +19,30 @@ namespace CurrentMetrics
 
 namespace DB
 {
+
+enum class RuntimeReloadType
+{
+    FULL,
+    ONLY_INCREASE,
+    NO,
+};
+
+static std::vector<std::pair<String, Int8>> getTypeEnumsAndValues()
+{
+    return std::vector<std::pair<String, Int8>>{
+        {"Full",            static_cast<Int8>(RuntimeReloadType::FULL)},
+        {"OnlyIncrease",    static_cast<Int8>(RuntimeReloadType::ONLY_INCREASE)},
+        {"No",              static_cast<Int8>(RuntimeReloadType::NO)},
+    };
+}
+
+struct UpdatedData {
+    std::string value;
+    RuntimeReloadType type;
+};
+
+
+
 NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
 {
     return {
@@ -28,30 +53,53 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
         {"description", std::make_shared<DataTypeString>()},
         {"type", std::make_shared<DataTypeString>()},
         {"is_obsolete", std::make_shared<DataTypeUInt8>()},
-        {"is_hot_reloadable", std::make_shared<DataTypeUInt8>()}
+        {"is_hot_reloadable", std::make_shared<DataTypeUInt8>()},
+        {"runtime_reload", std::make_shared<DataTypeEnum8>(getTypeEnumsAndValues())}
     };
 }
 
 void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
 {
     // Server settings that have been reloaded from the config file.
-    std::unordered_map<std::string, std::string> updated = {
-        {"max_server_memory_usage", std::to_string(total_memory_tracker.getHardLimit())},
-        {"allow_use_jemalloc_memory", std::to_string(total_memory_tracker.getAllowUseJemallocMmemory())},
+    // std::unordered_map<std::string, std::string> updated = {
+    //     {"max_server_memory_usage", std::to_string(total_memory_tracker.getHardLimit())},
+    //     {"allow_use_jemalloc_memory", std::to_string(total_memory_tracker.getAllowUseJemallocMmemory())},
 
-        {"max_table_size_to_drop", std::to_string(context->getMaxTableSizeToDrop())},
-        {"max_partition_size_to_drop", std::to_string(context->getMaxPartitionSizeToDrop())},
+    //     {"max_table_size_to_drop", std::to_string(context->getMaxTableSizeToDrop())},
+    //     {"max_partition_size_to_drop", std::to_string(context->getMaxPartitionSizeToDrop())},
 
-        {"max_concurrent_queries", std::to_string(context->getProcessList().getMaxSize())},
-        {"max_concurrent_insert_queries", std::to_string(context->getProcessList().getMaxInsertQueriesAmount())},
-        {"max_concurrent_select_queries", std::to_string(context->getProcessList().getMaxSelectQueriesAmount())},
+    //     {"max_concurrent_queries", std::to_string(context->getProcessList().getMaxSize())},
+    //     {"max_concurrent_insert_queries", std::to_string(context->getProcessList().getMaxInsertQueriesAmount())},
+    //     {"max_concurrent_select_queries", std::to_string(context->getProcessList().getMaxSelectQueriesAmount())},
 
-        {"background_pool_size", std::to_string(context->getMergeMutateExecutor()->getMaxThreads())},
-        {"background_move_pool_size", std::to_string(context->getMovesExecutor()->getMaxThreads())},
-        {"background_fetches_pool_size", std::to_string(context->getFetchesExecutor()->getMaxThreads())},
-        {"background_common_pool_size", std::to_string(context->getCommonExecutor()->getMaxThreads())},
+    //     {"background_pool_size", std::to_string(context->getMergeMutateExecutor()->getMaxThreads())},
+    //     {"background_move_pool_size", std::to_string(context->getMovesExecutor()->getMaxThreads())},
+    //     {"background_fetches_pool_size", std::to_string(context->getFetchesExecutor()->getMaxThreads())},
+    //     {"background_common_pool_size", std::to_string(context->getCommonExecutor()->getMaxThreads())},
 
-        {"background_buffer_flush_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize))},
+    //     {"background_buffer_flush_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize))},
+    //     {"background_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize))},
+    //     {"background_message_broker_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize))},
+    //     {"background_distributed_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize))}
+    // };
+
+    std::unordered_map<std::string, UpdatedData> updated = {
+        {"max_server_memory_usage", {std::to_string(total_memory_tracker.getHardLimit()), RuntimeReloadType::FULL}},
+        {"allow_use_jemalloc_memory", {std::to_string(total_memory_tracker.getAllowUseJemallocMmemory()), RuntimeReloadType::FULL}},
+
+        {"max_table_size_to_drop", {std::to_string(context->getMaxTableSizeToDrop()), RuntimeReloadType::FULL}},
+        {"max_partition_size_to_drop", {std::to_string(context->getMaxPartitionSizeToDrop()), RuntimeReloadType::FULL}},
+
+        {"max_concurrent_queries", {std::to_string(context->getProcessList().getMaxSize()), RuntimeReloadType::FULL}},
+        {"max_concurrent_insert_queries", {std::to_string(context->getProcessList().getMaxInsertQueriesAmount()), RuntimeReloadType::FULL}},
+        {"max_concurrent_select_queries", {std::to_string(context->getProcessList().getMaxSelectQueriesAmount()), RuntimeReloadType::FULL}},
+
+        {"background_pool_size", {std::to_string(context->getMergeMutateExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
+        {"background_move_pool_size", {std::to_string(context->getMovesExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
+        {"background_fetches_pool_size", {std::to_string(context->getFetchesExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
+        {"background_common_pool_size", {std::to_string(context->getCommonExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
+
+        {"background_buffer_flush_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize)), RuntimeReloadType::ONLY_INCREASE}},
         {"background_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize))},
         {"background_message_broker_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize))},
         {"background_distributed_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize))}
@@ -67,13 +115,14 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
         const auto & it = updated.find(setting_name);
 
         res_columns[0]->insert(setting_name);
-        res_columns[1]->insert((it != updated.end()) ? it->second : setting.getValueString());
+        res_columns[1]->insert((it != updated.end()) ? it->second.value: setting.getValueString());
         res_columns[2]->insert(setting.getDefaultValueString());
         res_columns[3]->insert(setting.isValueChanged());
         res_columns[4]->insert(setting.getDescription());
         res_columns[5]->insert(setting.getTypeName());
         res_columns[6]->insert(setting.isObsolete());
         res_columns[7]->insert((it != updated.end()) ? true : false);
+        res_columns[8]->insert((it != updated.end()) ? static_cast<Int8>(it->second.type): static_cast<Int8>(RuntimeReloadType::NO));
     }
 }
 

From 83d4b729615bc09b5019c164790353e9425891eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 19 Dec 2023 15:01:21 +0800
Subject: [PATCH 044/204] fix test

---
 .../02943_tokenbf_support_match.reference     | 47 ---------------
 .../02943_tokenbf_support_match.sql           | 59 ++++++++++++-------
 2 files changed, 38 insertions(+), 68 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
index 241346c13d7..d02011eb2a1 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -1,57 +1,10 @@
-========================================
-| SKIP 3 GRANUS                        |
-| Required String: Hello               |
-| Alternative String: Hello ClickHouse |
-| Alternative String: Hello World      |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 2/5
 
 
-========================================
-| SKIP 3 GRANUS                        |
-| No Required String                   |
-| Alternative String: ClickHouse       |
-| Alternative String: World            |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 2/5
 
 
-========================================
-| SKIP 4 GRANUS                        |
-| Required String: OLAP                |
-| No Alternative String                |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
index 2a98151624c..b48eb45c0d0 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -16,36 +16,53 @@ SETTINGS index_granularity = 1;
  
 INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
 
-SELECT '========================================';
-SELECT '| SKIP 3 GRANUS                        |';
-SELECT '| Required String: Hello               |';
-SELECT '| Alternative String: Hello ClickHouse |';
-SELECT '| Alternative String: Hello World      |';
-SELECT '========================================';
+--SKIP 3 GRANUS
+--Required String: Hello
+--Alternative String: Hello ClickHouse
+--Alternative String: Hello World
+SELECT 
+  *
+FROM
+(
+    EXPLAIN indexes=1
+    SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)')
+)
+WHERE
+  explain like '%Granules%';
 
-EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '========================================';
-SELECT '| SKIP 3 GRANUS                        |';
-SELECT '| No Required String                   |';
-SELECT '| Alternative String: ClickHouse       |';
-SELECT '| Alternative String: World            |';
-SELECT '========================================';
 
-EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
+--SKIP 3 GRANUS
+--No Required String
+--Alternative String: ClickHouse
+--Alternative String: World
+SELECT
+  *
+FROM
+(
+    EXPLAIN indexes = 1
+    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)')
+)
+WHERE
+  explain like '%Granules%';
 
 SELECT '';
 SELECT '';
 
-SELECT '========================================';
-SELECT '| SKIP 4 GRANUS                        |';
-SELECT '| Required String: OLAP                |';
-SELECT '| No Alternative String                |';
-SELECT '========================================';
-
-EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
+--SKIP 4 GRANUS
+--Required String: OLAP
+--No Alternative String
+SELECT
+  *
+FROM
+(
+    EXPLAIN indexes = 1
+    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*')
+)
+WHERE
+  explain like '%Granules%';
 
 DROP DATABASE IF EXISTS test_tokenbf_match;

From 6df2548417c46023aff87339f53691501380b48a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Dec 2023 09:11:18 +0000
Subject: [PATCH 045/204] Some minor adjustments

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 116 +++++++++---------
 ...f_indexes_support_match_function.reference |  26 ++++
 ...ngrambf_indexes_support_match_function.sql | 107 ++++++++++++++++
 .../02943_tokenbf_support_match.reference     |  10 --
 .../02943_tokenbf_support_match.sql           |  68 ----------
 5 files changed, 192 insertions(+), 135 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
 delete mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.reference
 delete mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.sql

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 85343aabd50..3dbc4e8a7f1 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -1,23 +1,23 @@
 #include <Storages/MergeTree/MergeTreeIndexFullText.h>
 
 #include <Columns/ColumnArray.h>
-#include <DataTypes/DataTypesNumber.h>
+#include <Common/OptimizedRegularExpression.h>
+#include <Core/Defines.h>
 #include <DataTypes/DataTypeArray.h>
-#include <IO/WriteHelpers.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/misc.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/RPNBuilder.h>
-#include <Storages/MergeTree/MergeTreeIndexUtils.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTSelectQuery.h>
-#include <Core/Defines.h>
-#include <Common/OptimizedRegularExpression.h>
+#include <Parsers/ASTSubquery.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreeIndexUtils.h>
+#include <Storages/MergeTree/RPNBuilder.h>
 
 #include <Poco/Logger.h>
 
@@ -243,20 +243,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
-
-    auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
-    {
-        std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-        const auto & bloom_filters = element.set_bloom_filters[0];
-
-        for (size_t row = 0; row < bloom_filters.size(); ++row)
-            result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-        rpn_stack.emplace_back(
-                std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
-    };
-
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -294,17 +280,32 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH
             || element.function == RPNElement::FUNCTION_HAS_ANY)
         {
-            multi_funtion_processor(element);
+            std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+            const auto & bloom_filters = element.set_bloom_filters[0];
+
+            for (size_t row = 0; row < bloom_filters.size(); ++row)
+                result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+            rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
             if (!element.set_bloom_filters.empty())
             {
-                multi_funtion_processor(element);
+                /// Alternative substrings
+                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+                const auto & bloom_filters = element.set_bloom_filters[0];
+
+                for (size_t row = 0; row < bloom_filters.size(); ++row)
+                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+                rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
             }
-            // If set_bloom_filters is not empty means we got alternative substring
             else if (element.bloom_filter)
             {
+                /// Required substrings
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
             }
         }
@@ -535,38 +536,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         return true;
     }
 
-    if (function_name == "match")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_MATCH;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-
-        auto & string_view = const_value.get<String>();
-        String required_substring;
-        std::vector<String> alternatives;
-        bool tmp_var;
-        OptimizedRegularExpression::analyze(string_view, required_substring, tmp_var, tmp_var, alternatives);
-
-        if (required_substring.empty() && alternatives.empty())
-            return false;
-
-        if (!alternatives.empty())
-        {
-            std::vector<std::vector<BloomFilter>> bloom_filters;
-            bloom_filters.emplace_back();
-            for (const auto & alternative : alternatives)
-            {
-                bloom_filters.back().emplace_back(params);
-                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
-            }
-            out.set_bloom_filters = std::move(bloom_filters);
-        }
-        else
-           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
-
-        return true;
-    }
-
     else if (function_name == "has")
     {
         out.key_column = *key_index;
@@ -654,6 +623,39 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         out.set_bloom_filters = std::move(bloom_filters);
         return true;
     }
+    else if (function_name == "match")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_MATCH;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+
+        auto & value = const_value.get<String>();
+        String required_substring;
+        bool dummy_is_trivial, dummy_required_substring_is_prefix;
+        std::vector<String> alternatives;
+        OptimizedRegularExpression::analyze(value, required_substring, dummy_is_trivial, dummy_required_substring_is_prefix, alternatives);
+
+        if (required_substring.empty() && alternatives.empty())
+            return false;
+
+        /// out.set_bloom_filters means alternatives exist
+        /// out.bloom_filter means required_substring exists
+        if (!alternatives.empty())
+        {
+            std::vector<std::vector<BloomFilter>> bloom_filters;
+            bloom_filters.emplace_back();
+            for (const auto & alternative : alternatives)
+            {
+                bloom_filters.back().emplace_back(params);
+                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
+            }
+            out.set_bloom_filters = std::move(bloom_filters);
+        }
+        else
+           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
+
+        return true;
+    }
 
     return false;
 }
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
new file mode 100644
index 00000000000..41ca02e3877
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -0,0 +1,26 @@
+1	Hello ClickHouse
+2	Hello World
+1	Hello ClickHouse
+2	Hello World
+          Granules: 6/6
+          Granules: 2/6
+          Granules: 6/6
+          Granules: 2/6
+---
+1	Hello ClickHouse
+2	Hello World
+6	World Champion
+1	Hello ClickHouse
+2	Hello World
+6	World Champion
+          Granules: 6/6
+          Granules: 3/6
+          Granules: 6/6
+          Granules: 3/6
+---
+5	OLAP Database
+5	OLAP Database
+          Granules: 6/6
+          Granules: 1/6
+          Granules: 6/6
+          Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
new file mode 100644
index 00000000000..7378df41b8d
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -0,0 +1,107 @@
+DROP TABLE IF EXISTS tokenbf_tab;
+DROP TABLE IF EXISTS ngrambf_tab;
+
+CREATE TABLE tokenbf_tab
+(
+    id UInt32,
+    str String,
+    INDEX idx str TYPE tokenbf_v1(256, 2, 0)
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+
+CREATE TABLE ngrambf_tab
+(
+    id UInt32,
+    str String,
+    INDEX idx str TYPE ngrambf_v1(3, 256, 2, 0)
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+
+INSERT INTO tokenbf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
+INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
+
+SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
+
+-- Skip 2/6 granules
+-- Required string: 'Hello '
+-- Alternatives: 'Hello ClickHouse', 'Hello World'
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT '---';
+
+SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
+
+-- Skip 3/6 granules
+-- Required string: -
+-- Alternatives: 'ClickHouse', 'World'
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT '---';
+
+SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
+
+-- Skip 5/6 granules
+-- Required string: 'OLAP'
+-- Alternatives: -
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+DROP TABLE tokenbf_tab;
+DROP TABLE ngrambf_tab;
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
deleted file mode 100644
index d02011eb2a1..00000000000
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ /dev/null
@@ -1,10 +0,0 @@
-      Granules: 5/5
-      Granules: 2/5
-
-
-      Granules: 5/5
-      Granules: 2/5
-
-
-      Granules: 5/5
-      Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
deleted file mode 100644
index b48eb45c0d0..00000000000
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ /dev/null
@@ -1,68 +0,0 @@
--- Tags: no-parallel
-
-DROP DATABASE IF EXISTS test_tokenbf_match;
-
-CREATE DATABASE test_tokenbf_match;
-
-CREATE TABLE test_tokenbf_match.test_tokenbf 
-(
-    `id` UInt32,
-    `str` String,
-    INDEX str_idx str TYPE tokenbf_v1(256, 2, 0) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity = 1;
- 
-INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
-
---SKIP 3 GRANUS
---Required String: Hello
---Alternative String: Hello ClickHouse
---Alternative String: Hello World
-SELECT 
-  *
-FROM
-(
-    EXPLAIN indexes=1
-    SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)')
-)
-WHERE
-  explain like '%Granules%';
-
-
-SELECT '';
-SELECT '';
-
-
---SKIP 3 GRANUS
---No Required String
---Alternative String: ClickHouse
---Alternative String: World
-SELECT
-  *
-FROM
-(
-    EXPLAIN indexes = 1
-    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)')
-)
-WHERE
-  explain like '%Granules%';
-
-SELECT '';
-SELECT '';
-
---SKIP 4 GRANUS
---Required String: OLAP
---No Alternative String
-SELECT
-  *
-FROM
-(
-    EXPLAIN indexes = 1
-    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*')
-)
-WHERE
-  explain like '%Granules%';
-
-DROP DATABASE IF EXISTS test_tokenbf_match;

From 267b35ff671ec6c4c8340457ff2527829a00e52b Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Tue, 19 Dec 2023 12:24:32 +0000
Subject: [PATCH 046/204] feat: add server setting config

---
 src/Core/ServerSettings.cpp                   |   5 +-
 src/Core/ServerSettings.h                     | 197 ++++++++++--------
 .../System/StorageSystemServerSettings.cpp    |  61 ++----
 3 files changed, 130 insertions(+), 133 deletions(-)

diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index fbf86d3e9ad..93de0a6d5c3 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -4,10 +4,13 @@
 namespace DB
 {
 
-IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
+IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WITH_FLAG)
 
 void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
 {
+
+    SERVER_SETTINGS(SET_RUNTIME_RELOAD_, "", "")
+    
     // settings which can be loaded from the the default profile, see also MAKE_DEPRECATED_BY_SERVER_CONFIG in src/Core/Settings.h
     std::unordered_set<std::string> settings_from_profile_allowlist = {
         "background_pool_size",
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index de0fff35389..a250cdd2020 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -13,98 +13,119 @@ class AbstractConfiguration;
 namespace DB
 {
 
-#define SERVER_SETTINGS(M, ALIAS) \
-    M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
-    M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
-    M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
-    M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
-    M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
-    M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
-    M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
-    M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
-    M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
-    M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
-    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
-    M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
-    M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
-    M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
-    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
-    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
-    M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
-    M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
-    M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
-    M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
-    M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
-    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
-    M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
-    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
-    M(Int32, max_connections, 1024, "Max server connections.", 0) \
-    M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
-    M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
-    M(String, default_database, "default", "Default database name.", 0) \
-    M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
-    M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
-    M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
-    M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
-    M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
-    M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
-    M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
-    M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
-    M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
-    M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
-    M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
-    \
-    M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
-    M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
-    M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
-    \
-    M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
-    M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
-    M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
-    M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
-    M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
-    M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
-    M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
-    M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
-    M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
-    M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
-    M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
-    M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
-    M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
-    M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
-    \
-    M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
-    M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
-    M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
-    \
-    M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
-    M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
-    M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
-    M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
-    \
-    M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
-    M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
-    M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
-    M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
-    M(UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
-    M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
-    M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
-    M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
-    M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
-    M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
-    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
-    \
-    M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
-    M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
-    M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
-    M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
-    M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
-    M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
+enum class RuntimeReloadType
+{
+    FULL,
+    ONLY_INCREASE,
+    NO,
+};
+
+#define SET_RUNTIME_RELOAD_(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
+    this->runtime_reload_map.insert(std::make_pair(#NAME, RUNTIME_RELOAD));
+
+#define M_WITH_FLAG_(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
+    M(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
+
+
+#define SERVER_SETTINGS(M_WITH_FLAG, M, ALIAS) \
+    M_WITH_FLAG(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
+    \
+    M_WITH_FLAG(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::NO) \
+
+
+#define SERVER_SETTINGS_WITH_FLAG(M, ALIAS) \
+    SERVER_SETTINGS(M_WITH_FLAG_, M, ALIAS)
+
+DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WITH_FLAG)
 
-DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
 
 struct ServerSettings : public BaseSettings<ServerSettingsTraits>
 {
+    std::unordered_map<std::string, RuntimeReloadType> runtime_reload_map;
+    
     void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
 };
 
diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index ded5d8e8fae..796d919253f 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -20,13 +20,6 @@ namespace CurrentMetrics
 namespace DB
 {
 
-enum class RuntimeReloadType
-{
-    FULL,
-    ONLY_INCREASE,
-    NO,
-};
-
 static std::vector<std::pair<String, Int8>> getTypeEnumsAndValues()
 {
     return std::vector<std::pair<String, Int8>>{
@@ -61,50 +54,29 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
 void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
 {
     // Server settings that have been reloaded from the config file.
-    // std::unordered_map<std::string, std::string> updated = {
-    //     {"max_server_memory_usage", std::to_string(total_memory_tracker.getHardLimit())},
-    //     {"allow_use_jemalloc_memory", std::to_string(total_memory_tracker.getAllowUseJemallocMmemory())},
+    std::unordered_map<std::string, std::string> updated = {
+        {"max_server_memory_usage", std::to_string(total_memory_tracker.getHardLimit())},
+        {"allow_use_jemalloc_memory", std::to_string(total_memory_tracker.getAllowUseJemallocMmemory())},
 
-    //     {"max_table_size_to_drop", std::to_string(context->getMaxTableSizeToDrop())},
-    //     {"max_partition_size_to_drop", std::to_string(context->getMaxPartitionSizeToDrop())},
+        {"max_table_size_to_drop", std::to_string(context->getMaxTableSizeToDrop())},
+        {"max_partition_size_to_drop", std::to_string(context->getMaxPartitionSizeToDrop())},
 
-    //     {"max_concurrent_queries", std::to_string(context->getProcessList().getMaxSize())},
-    //     {"max_concurrent_insert_queries", std::to_string(context->getProcessList().getMaxInsertQueriesAmount())},
-    //     {"max_concurrent_select_queries", std::to_string(context->getProcessList().getMaxSelectQueriesAmount())},
+        {"max_concurrent_queries", std::to_string(context->getProcessList().getMaxSize())},
+        {"max_concurrent_insert_queries", std::to_string(context->getProcessList().getMaxInsertQueriesAmount())},
+        {"max_concurrent_select_queries", std::to_string(context->getProcessList().getMaxSelectQueriesAmount())},
 
-    //     {"background_pool_size", std::to_string(context->getMergeMutateExecutor()->getMaxThreads())},
-    //     {"background_move_pool_size", std::to_string(context->getMovesExecutor()->getMaxThreads())},
-    //     {"background_fetches_pool_size", std::to_string(context->getFetchesExecutor()->getMaxThreads())},
-    //     {"background_common_pool_size", std::to_string(context->getCommonExecutor()->getMaxThreads())},
+        {"background_pool_size", std::to_string(context->getMergeMutateExecutor()->getMaxThreads())},
+        {"background_move_pool_size", std::to_string(context->getMovesExecutor()->getMaxThreads())},
+        {"background_fetches_pool_size", std::to_string(context->getFetchesExecutor()->getMaxThreads())},
+        {"background_common_pool_size", std::to_string(context->getCommonExecutor()->getMaxThreads())},
 
-    //     {"background_buffer_flush_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize))},
-    //     {"background_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize))},
-    //     {"background_message_broker_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize))},
-    //     {"background_distributed_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize))}
-    // };
-
-    std::unordered_map<std::string, UpdatedData> updated = {
-        {"max_server_memory_usage", {std::to_string(total_memory_tracker.getHardLimit()), RuntimeReloadType::FULL}},
-        {"allow_use_jemalloc_memory", {std::to_string(total_memory_tracker.getAllowUseJemallocMmemory()), RuntimeReloadType::FULL}},
-
-        {"max_table_size_to_drop", {std::to_string(context->getMaxTableSizeToDrop()), RuntimeReloadType::FULL}},
-        {"max_partition_size_to_drop", {std::to_string(context->getMaxPartitionSizeToDrop()), RuntimeReloadType::FULL}},
-
-        {"max_concurrent_queries", {std::to_string(context->getProcessList().getMaxSize()), RuntimeReloadType::FULL}},
-        {"max_concurrent_insert_queries", {std::to_string(context->getProcessList().getMaxInsertQueriesAmount()), RuntimeReloadType::FULL}},
-        {"max_concurrent_select_queries", {std::to_string(context->getProcessList().getMaxSelectQueriesAmount()), RuntimeReloadType::FULL}},
-
-        {"background_pool_size", {std::to_string(context->getMergeMutateExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
-        {"background_move_pool_size", {std::to_string(context->getMovesExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
-        {"background_fetches_pool_size", {std::to_string(context->getFetchesExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
-        {"background_common_pool_size", {std::to_string(context->getCommonExecutor()->getMaxThreads()), RuntimeReloadType::ONLY_INCREASE}},
-
-        {"background_buffer_flush_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize)), RuntimeReloadType::ONLY_INCREASE}},
+        {"background_buffer_flush_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize))},
         {"background_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize))},
         {"background_message_broker_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize))},
         {"background_distributed_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize))}
     };
 
+
     const auto & config = context->getConfigRef();
     ServerSettings settings;
     settings.loadSettingsFromConfig(config);
@@ -113,16 +85,17 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
     {
         const auto & setting_name = setting.getName();
         const auto & it = updated.find(setting_name);
+        const auto & runtime_reload_it = settings.runtime_reload_map.find(setting_name);
 
         res_columns[0]->insert(setting_name);
-        res_columns[1]->insert((it != updated.end()) ? it->second.value: setting.getValueString());
+        res_columns[1]->insert((it != updated.end()) ? it->second: setting.getValueString());
         res_columns[2]->insert(setting.getDefaultValueString());
         res_columns[3]->insert(setting.isValueChanged());
         res_columns[4]->insert(setting.getDescription());
         res_columns[5]->insert(setting.getTypeName());
         res_columns[6]->insert(setting.isObsolete());
         res_columns[7]->insert((it != updated.end()) ? true : false);
-        res_columns[8]->insert((it != updated.end()) ? static_cast<Int8>(it->second.type): static_cast<Int8>(RuntimeReloadType::NO));
+        res_columns[8]->insert(static_cast<Int8>(runtime_reload_it != settings.runtime_reload_map.end() ? runtime_reload_it->second: RuntimeReloadType::NO));
     }
 }
 

From d557c0946a02f742ea478535f2d8ec3a0b7a9dbd Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Tue, 19 Dec 2023 14:12:04 +0000
Subject: [PATCH 047/204] fix server setting  runtime reload type

---
 src/Core/ServerSettings.h | 77 ++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 39b7d523adf..46b83bed74b 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -17,6 +17,7 @@ enum class RuntimeReloadType
 {
     FULL,
     ONLY_INCREASE,
+    ONLY_DECREASE,
     NO,
 };
 
@@ -28,27 +29,27 @@ enum class RuntimeReloadType
 
 
 #define SERVER_SETTINGS(M_WITH_FLAG, M, ALIAS) \
-    M_WITH_FLAG(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::FULL) \
     M_WITH_FLAG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
     M_WITH_FLAG(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::NO) \
@@ -60,17 +61,17 @@ enum class RuntimeReloadType
     M_WITH_FLAG(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::FULL) \
     M_WITH_FLAG(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    M_WITH_FLAG(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
     \
     M_WITH_FLAG(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
@@ -91,27 +92,27 @@ enum class RuntimeReloadType
     M_WITH_FLAG(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
-    M_WITH_FLAG(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
-    M_WITH_FLAG(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
-    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
-    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
+    M_WITH_FLAG(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::ONLY_DECREASE) \
+    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::ONLY_DECREASE) \
     \
-    M_WITH_FLAG(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
-    M_WITH_FLAG(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
-    M_WITH_FLAG(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
+    M_WITH_FLAG(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::NO, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    M_WITH_FLAG(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
+    M_WITH_FLAG(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::NO) \
     M_WITH_FLAG(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
     \
     M_WITH_FLAG(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \

From 7cbb4ad7246b9796fad8c06e915fc394cea4ca65 Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Tue, 19 Dec 2023 15:20:32 +0000
Subject: [PATCH 048/204] fix macro name

---
 src/Core/ServerSettings.cpp |   4 +-
 src/Core/ServerSettings.h   | 184 ++++++++++++++++++------------------
 2 files changed, 94 insertions(+), 94 deletions(-)

diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index 93de0a6d5c3..77010426c4a 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -4,12 +4,12 @@
 namespace DB
 {
 
-IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WITH_FLAG)
+IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WRAP)
 
 void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
 {
 
-    SERVER_SETTINGS(SET_RUNTIME_RELOAD_, "", "")
+    SERVER_SETTINGS(SET_RUNTIME_RELOAD, "", "")
     
     // settings which can be loaded from the the default profile, see also MAKE_DEPRECATED_BY_SERVER_CONFIG in src/Core/Settings.h
     std::unordered_set<std::string> settings_from_profile_allowlist = {
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 46b83bed74b..934f6c62337 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,112 +21,112 @@ enum class RuntimeReloadType
     NO,
 };
 
-#define SET_RUNTIME_RELOAD_(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
+#define SET_RUNTIME_RELOAD(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
     this->runtime_reload_map.insert(std::make_pair(#NAME, RUNTIME_RELOAD));
 
-#define M_WITH_FLAG_(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
+#define M_WRAP(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
     M(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
 
 
-#define SERVER_SETTINGS(M_WITH_FLAG, M, ALIAS) \
-    M_WITH_FLAG(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::NO) \
+#define SERVER_SETTINGS(MW, M, ALIAS) \
+    MW(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::NO) \
+    MW(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::NO) \
+    MW(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::NO) \
+    MW(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::NO) \
+    MW(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::FULL) \
+    MW(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
     \
-    M_WITH_FLAG(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::NO) \
+    MW(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    MW(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::NO) \
+    MW(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    MW(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    MW(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::NO) \
+    MW(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::ONLY_DECREASE) \
-    M_WITH_FLAG(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::ONLY_DECREASE) \
+    MW(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::ONLY_DECREASE) \
+    MW(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::ONLY_DECREASE) \
     \
-    M_WITH_FLAG(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::NO, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    M_WITH_FLAG(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
-    M_WITH_FLAG(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::ONLY_INCREASE) \
+    MW(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
+    MW(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
     \
-    M_WITH_FLAG(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::NO) \
-    M_WITH_FLAG(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::NO) \
 
 
-#define SERVER_SETTINGS_WITH_FLAG(M, ALIAS) \
-    SERVER_SETTINGS(M_WITH_FLAG_, M, ALIAS)
+#define SERVER_SETTINGS_WRAP(M, ALIAS) \
+    SERVER_SETTINGS(M_WRAP, M, ALIAS)
 
-DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WITH_FLAG)
+DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WRAP)
 
 
 struct ServerSettings : public BaseSettings<ServerSettingsTraits>

From 00576d2092577ddf4822eb076f61253012e80455 Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Tue, 19 Dec 2023 15:23:20 +0000
Subject: [PATCH 049/204] add onlyDecrease to enums

---
 src/Storages/System/StorageSystemServerSettings.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index a3b132f4152..5640207f290 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -25,6 +25,7 @@ static std::vector<std::pair<String, Int8>> getTypeEnumsAndValues()
     return std::vector<std::pair<String, Int8>>{
         {"Full",            static_cast<Int8>(RuntimeReloadType::FULL)},
         {"OnlyIncrease",    static_cast<Int8>(RuntimeReloadType::ONLY_INCREASE)},
+        {"OnlyDecrease",    static_cast<Int8>(RuntimeReloadType::ONLY_DECREASE)},
         {"No",              static_cast<Int8>(RuntimeReloadType::NO)},
     };
 }

From 342a4f7b96cce6d8375dfd5ac95b5581c5633afb Mon Sep 17 00:00:00 2001
From: skyoct <skyoct@163.com>
Date: Thu, 21 Dec 2023 14:25:18 +0000
Subject: [PATCH 050/204] add doc and opt some code

---
 .../system-tables/server_settings.md          |  37 ++--
 src/Core/ServerSettings.cpp                   |   1 -
 src/Core/ServerSettings.h                     | 190 +++++++++---------
 .../System/StorageSystemServerSettings.cpp    |  28 +--
 4 files changed, 125 insertions(+), 131 deletions(-)

diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md
index 7efe605ccef..6374b2d02a2 100644
--- a/docs/en/operations/system-tables/server_settings.md
+++ b/docs/en/operations/system-tables/server_settings.md
@@ -14,6 +14,12 @@ Columns:
 - `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
 - `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
 - `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
+- `is_hot_reloadable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether hot reload is supported
+- `runtime_reload` ([Enum8](../../sql-reference/data-types/enum.md)) — Hot reload type of parameter. Possible values:
+    - `Yes`
+    - `OnlyIncrease`
+    - `OnlyDecrease`
+    - `No`
 - `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete.
 
 **Example**
@@ -27,22 +33,21 @@ WHERE name LIKE '%thread_pool%'
 ```
 
 ``` text
-┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
-───────────────────────────────────_─type───_─is_obsolete─┐
-│ max_thread_pool_size                        │ 10000 │ 10000   │       1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │           0 │
-│ max_thread_pool_free_size                   │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │           0 │
-│ thread_pool_queue_size                      │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │           0 │
-│ max_io_thread_pool_size                     │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │           0 │
-│ max_io_thread_pool_free_size                │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │           0 │
-│ io_thread_pool_queue_size                   │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │           0 │
-│ max_active_parts_loading_thread_pool_size   │ 64    │ 64      │       0 │ The number of threads to load active set of data parts (Active ones) at startup.                                                                    │ UInt64 │           0 │
-│ max_outdated_parts_loading_thread_pool_size │ 32    │ 32      │       0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup.                                                                │ UInt64 │           0 │
-│ max_parts_cleaning_thread_pool_size         │ 128   │ 128     │       0 │ The number of threads for concurrent removal of inactive data parts.                                                                                │ UInt64 │           0 │
-│ max_backups_io_thread_pool_size             │ 1000  │ 1000    │       0 │ The maximum number of threads that would be used for IO operations for BACKUP queries                                                               │ UInt64 │           0 │
-│ max_backups_io_thread_pool_free_size        │ 0     │ 0       │       0 │ Max free size for backups IO thread pool.                                                                                                           │ UInt64 │           0 │
-│ backups_io_thread_pool_queue_size           │ 0     │ 0       │       0 │ Queue size for backups IO thread pool.                                                                                                              │ UInt64 │           0 │
-└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-───────────────────────────────────┴────────┴─────────────┘
+┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─is_hot_reloadable─┬─runtime_reload─┬─is_obsolete─┐
+│ max_thread_pool_size                        │ 10000 │ 10000   │       0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │                 0 │ No             │           0 │
+│ max_thread_pool_free_size                   │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │                 0 │ No             │           0 │
+│ thread_pool_queue_size                      │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │                 0 │ No             │           0 │
+│ max_io_thread_pool_size                     │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │                 0 │ Yes            │           0 │
+│ max_io_thread_pool_free_size                │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │                 0 │ Yes            │           0 │
+│ io_thread_pool_queue_size                   │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │                 0 │ Yes            │           0 │
+│ max_active_parts_loading_thread_pool_size   │ 64    │ 64      │       0 │ The number of threads to load active set of data parts (Active ones) at startup.                                                                    │ UInt64 │                 0 │ Yes            │           0 │
+│ max_outdated_parts_loading_thread_pool_size │ 32    │ 32      │       0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup.                                                                │ UInt64 │                 0 │ Yes            │           0 │
+│ max_parts_cleaning_thread_pool_size         │ 128   │ 128     │       0 │ The number of threads for concurrent removal of inactive data parts.                                                                                │ UInt64 │                 0 │ Yes            │           0 │
+│ max_backups_io_thread_pool_size             │ 1000  │ 1000    │       0 │ The maximum number of threads that would be used for IO operations for BACKUP queries                                                               │ UInt64 │                 0 │ Yes            │           0 │
+│ max_backups_io_thread_pool_free_size        │ 0     │ 0       │       0 │ Max free size for backups IO thread pool.                                                                                                           │ UInt64 │                 0 │ Yes            │           0 │
+│ backups_io_thread_pool_queue_size           │ 0     │ 0       │       0 │ Queue size for backups IO thread pool.                                                                                                              │ UInt64 │                 0 │ Yes            │           0 │
+└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴───────────────────┴────────────────┴─────────────┘
+
 ```
 
 Using of `WHERE changed` can be useful, for example, when you want to check 
diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index 77010426c4a..7f48226c213 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -10,7 +10,6 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat
 {
 
     SERVER_SETTINGS(SET_RUNTIME_RELOAD, "", "")
-    
     // settings which can be loaded from the the default profile, see also MAKE_DEPRECATED_BY_SERVER_CONFIG in src/Core/Settings.h
     std::unordered_set<std::string> settings_from_profile_allowlist = {
         "background_pool_size",
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 10fac354b5c..3215a35b483 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -9,16 +9,15 @@ namespace Poco::Util
 {
 class AbstractConfiguration;
 }
-
 namespace DB
 {
 
 enum class RuntimeReloadType
 {
-    FULL,
-    ONLY_INCREASE,
-    ONLY_DECREASE,
-    NO,
+    Yes,
+    OnlyIncrease,
+    OnlyDecrease,
+    No,
 };
 
 #define SET_RUNTIME_RELOAD(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
@@ -29,102 +28,102 @@ enum class RuntimeReloadType
 
 
 #define SERVER_SETTINGS(MW, M, ALIAS) \
-    MW(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::NO) \
-    MW(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::NO) \
-    MW(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::NO) \
-    MW(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::NO) \
-    MW(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::NO) \
-    MW(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    MW(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    MW(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::FULL) \
-    MW(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::NO) \
-    MW(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::No) \
+    MW(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::No) \
+    MW(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::No) \
+    MW(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::No) \
+    MW(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::No) \
+    MW(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::No) \
+    MW(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::No) \
+    MW(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
+    MW(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
+    MW(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::Yes) \
+    MW(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::No) \
+    MW(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::No) \
     \
-    MW(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::FULL) \
+    MW(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
     \
-    MW(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::NO) \
-    MW(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    MW(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    MW(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::NO) \
-    MW(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    MW(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    MW(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    MW(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::NO) \
-    MW(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::NO) \
+    MW(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::No) \
+    MW(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::No) \
+    MW(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
+    MW(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::No) \
+    MW(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
+    MW(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::No) \
+    MW(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
+    MW(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::No) \
+    MW(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::No) \
     \
-    MW(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::NO) \
-    MW(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::NO) \
+    MW(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::No) \
+    MW(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::No) \
+    MW(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::No) \
     \
-    MW(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::ONLY_DECREASE) \
-    MW(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::ONLY_DECREASE) \
+    MW(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::OnlyDecrease) \
+    MW(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::OnlyDecrease) \
     \
-    MW(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::ONLY_INCREASE) \
-    MW(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
-    MW(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::FULL) \
-    MW(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::NO) \
-    MW(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
+    MW(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::OnlyIncrease) \
+    MW(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::Yes) \
+    MW(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::Yes) \
+    MW(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::No) \
+    MW(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::No) \
     \
-    MW(M, Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0, RuntimeReloadType::NO) \
-    MW(M, Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
-    MW(M, Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
-    MW(M, Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \
-    MW(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
-    MW(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \
-    MW(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::NO) \
-    MW(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::NO) \
+    MW(M, Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0, RuntimeReloadType::No) \
+    MW(M, Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
+    MW(M, Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
+    MW(M, Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::No) \
+    MW(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::No) \
+    MW(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::No) \
+    MW(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::No) \
+    MW(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::No) \
 
 
 #define SERVER_SETTINGS_WRAP(M, ALIAS) \
@@ -136,7 +135,6 @@ DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WRAP)
 struct ServerSettings : public BaseSettings<ServerSettingsTraits>
 {
     std::unordered_map<std::string, RuntimeReloadType> runtime_reload_map;
-    
     void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
 };
 
diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index 5640207f290..bcfca59618a 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -20,23 +20,16 @@ namespace CurrentMetrics
 namespace DB
 {
 
-static std::vector<std::pair<String, Int8>> getTypeEnumsAndValues()
+static std::vector<std::pair<String, Int8>> getRuntimeReloadEnumAndValues()
 {
     return std::vector<std::pair<String, Int8>>{
-        {"Full",            static_cast<Int8>(RuntimeReloadType::FULL)},
-        {"OnlyIncrease",    static_cast<Int8>(RuntimeReloadType::ONLY_INCREASE)},
-        {"OnlyDecrease",    static_cast<Int8>(RuntimeReloadType::ONLY_DECREASE)},
-        {"No",              static_cast<Int8>(RuntimeReloadType::NO)},
+        {"Yes",            static_cast<Int8>(RuntimeReloadType::Yes)},
+        {"OnlyIncrease",    static_cast<Int8>(RuntimeReloadType::OnlyIncrease)},
+        {"OnlyDecrease",    static_cast<Int8>(RuntimeReloadType::OnlyDecrease)},
+        {"No",              static_cast<Int8>(RuntimeReloadType::No)},
     };
 }
 
-struct UpdatedData {
-    std::string value;
-    RuntimeReloadType type;
-};
-
-
-
 NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
 {
     return {
@@ -46,9 +39,9 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
         {"changed", std::make_shared<DataTypeUInt8>()},
         {"description", std::make_shared<DataTypeString>()},
         {"type", std::make_shared<DataTypeString>()},
-        {"is_obsolete", std::make_shared<DataTypeUInt8>()},
         {"is_hot_reloadable", std::make_shared<DataTypeUInt8>()},
-        {"runtime_reload", std::make_shared<DataTypeEnum8>(getTypeEnumsAndValues())}
+        {"runtime_reload", std::make_shared<DataTypeEnum8>(getRuntimeReloadEnumAndValues())},
+        {"is_obsolete", std::make_shared<DataTypeUInt8>()}
     };
 }
 
@@ -88,7 +81,6 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
     {
         const auto & setting_name = setting.getName();
         const auto & it = updated.find(setting_name);
-        const auto & runtime_reload_it = settings.runtime_reload_map.find(setting_name);
 
         res_columns[0]->insert(setting_name);
         res_columns[1]->insert((it != updated.end()) ? it->second: setting.getValueString());
@@ -96,9 +88,9 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
         res_columns[3]->insert(setting.isValueChanged());
         res_columns[4]->insert(setting.getDescription());
         res_columns[5]->insert(setting.getTypeName());
-        res_columns[6]->insert(setting.isObsolete());
-        res_columns[7]->insert((it != updated.end()) ? true : false);
-        res_columns[8]->insert(static_cast<Int8>(runtime_reload_it != settings.runtime_reload_map.end() ? runtime_reload_it->second: RuntimeReloadType::NO));
+        res_columns[6]->insert((it != updated.end()) ? true : false);
+        res_columns[7]->insert(static_cast<Int8>(settings.runtime_reload_map.contains(setting_name) ? settings.runtime_reload_map.at(setting_name): RuntimeReloadType::No));
+        res_columns[8]->insert(setting.isObsolete());
     }
 }
 

From e71f6893cc96d63c829e6f4f61178c8367dd0063 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 18:01:11 +0100
Subject: [PATCH 051/204] Add brief comment for MergeTreeSequentialSource

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 5075e43448a..ba50447be0f 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -20,7 +20,9 @@ namespace ErrorCodes
 }
 
 
-/// Lightweight (in terms of logic) stream for reading single part from MergeTree
+/// Lightweight (in terms of logic) stream for reading single part from
+/// MergeTree, used for merges and mutations.
+///
 /// NOTE:
 ///  It doesn't filter out rows that are deleted with lightweight deletes.
 ///  Use createMergeTreeSequentialSource filter out those rows.

From 79de5c16c92fc93a2a428aa12b6530e16ff2a7f9 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 17:58:54 +0100
Subject: [PATCH 052/204] Apply all reader settings for merges/mutations

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index ba50447be0f..1e406358277 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -144,10 +144,14 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         columns_for_reader = data_part->getColumns().addTypes(columns_to_read);
     }
 
-    ReadSettings read_settings;
+    const auto & context = storage.getContext();
+    ReadSettings read_settings = context->getReadSettings();
+    read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
+    /// It does not make sense to use pthread_threadpool for background merges/mutations
+    /// And also to preserve backward compatibility
+    read_settings.local_fs_method = LocalFSReadMethod::pread;
     if (read_with_direct_io)
         read_settings.direct_io_threshold = 1;
-    read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
 
     MergeTreeReaderSettings reader_settings =
     {

From 6ed9b53d1f9f570b19b7b36b7d872ff10f3bca7d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 19:11:59 +0100
Subject: [PATCH 053/204] Refactor test_throttling slightly for upcoming tests

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../{server_overrides.xml => dynamic_overrides.xml}       | 0
 .../configs/{server_backups.xml => static_overrides.xml}  | 0
 tests/integration/test_throttling/test.py                 | 8 ++++----
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename tests/integration/test_throttling/configs/{server_overrides.xml => dynamic_overrides.xml} (100%)
 rename tests/integration/test_throttling/configs/{server_backups.xml => static_overrides.xml} (100%)

diff --git a/tests/integration/test_throttling/configs/server_overrides.xml b/tests/integration/test_throttling/configs/dynamic_overrides.xml
similarity index 100%
rename from tests/integration/test_throttling/configs/server_overrides.xml
rename to tests/integration/test_throttling/configs/dynamic_overrides.xml
diff --git a/tests/integration/test_throttling/configs/server_backups.xml b/tests/integration/test_throttling/configs/static_overrides.xml
similarity index 100%
rename from tests/integration/test_throttling/configs/server_backups.xml
rename to tests/integration/test_throttling/configs/static_overrides.xml
diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index 04d02cc859d..31884fad88a 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -34,8 +34,8 @@ node = cluster.add_instance(
     "node",
     stay_alive=True,
     main_configs=[
-        "configs/server_backups.xml",
-        "configs/server_overrides.xml",
+        "configs/static_overrides.xml",
+        "configs/dynamic_overrides.xml",
         "configs/ssl.xml",
     ],
     user_configs=[
@@ -64,7 +64,7 @@ def revert_config():
         [
             "bash",
             "-c",
-            f"echo '<clickhouse></clickhouse>' > /etc/clickhouse-server/config.d/server_overrides.xml",
+            f"echo '<clickhouse></clickhouse>' > /etc/clickhouse-server/config.d/dynamic_overrides.xml",
         ]
     )
     node.exec_in_container(
@@ -96,7 +96,7 @@ def node_update_config(mode, setting, value=None):
     if mode is None:
         return
     if mode == "server":
-        config_path = "/etc/clickhouse-server/config.d/server_overrides.xml"
+        config_path = "/etc/clickhouse-server/config.d/dynamic_overrides.xml"
         config_content = f"""
         <clickhouse><{setting}>{value}</{setting}></clickhouse>
         """

From 837f4ea676665cda6383fb3b3e3ed04d8560ba76 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 18:30:11 +0100
Subject: [PATCH 054/204] Add ability to throttle merges/mutations

Main motivation was to has an ability to throttle background tasks, to
avoid affecting queries.

To new server settings had been added for this:
- max_mutations_bandwidth_for_server
- max_merges_bandwidth_for_server

Note, that they limit only reading, since usually you will not write
more data then you read, but sometimes it is possible in case of ALTER
UPDATE.

But for now, to keep things simple, I decided to limit this with only
2 settings instead of 4.

Note, that if the write throttling will be needed, then they can use the
same settings, and just create new throttler for write.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/ServerSettings.h                     |  2 ++
 src/Interpreters/Context.cpp                  | 19 ++++++++++++
 src/Interpreters/Context.h                    |  3 ++
 src/Interpreters/MutationsInterpreter.cpp     |  1 +
 src/Storages/MergeTree/MergeTask.cpp          |  2 ++
 .../MergeTree/MergeTreeSequentialSource.cpp   | 24 +++++++++++++--
 .../MergeTree/MergeTreeSequentialSource.h     |  8 +++++
 .../configs/static_overrides.xml              |  3 ++
 tests/integration/test_throttling/test.py     | 29 +++++++++++++++++++
 9 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 85e3d33f80b..310b3585eab 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -26,6 +26,8 @@ namespace DB
     M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
     M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
     M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
+    M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 589d03cc074..746c7706eb4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -325,6 +325,9 @@ struct ContextSharedPart : boost::noncopyable
 
     mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
 
+    mutable ThrottlerPtr mutations_throttler;               /// A server-wide throttler for mutations
+    mutable ThrottlerPtr merges_throttler;                  /// A server-wide throttler for merges
+
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker TSA_GUARDED_BY(mutex); /// Process ddl commands from zk.
     LoadTaskPtr ddl_worker_startup_task;                         /// To postpone `ddl_worker->startup()` after all tables startup
@@ -733,6 +736,12 @@ struct ContextSharedPart : boost::noncopyable
 
         if (auto bandwidth = server_settings.max_backup_bandwidth_for_server)
             backups_server_throttler = std::make_shared<Throttler>(bandwidth);
+
+        if (auto bandwidth = server_settings.max_mutations_bandwidth_for_server)
+            mutations_throttler = std::make_shared<Throttler>(bandwidth);
+
+        if (auto bandwidth = server_settings.max_merges_bandwidth_for_server)
+            merges_throttler = std::make_shared<Throttler>(bandwidth);
     }
 };
 
@@ -2994,6 +3003,16 @@ ThrottlerPtr Context::getBackupsThrottler() const
     return throttler;
 }
 
+ThrottlerPtr Context::getMutationsThrottler() const
+{
+    return shared->mutations_throttler;
+}
+
+ThrottlerPtr Context::getMergesThrottler() const
+{
+    return shared->merges_throttler;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 39d2212ce80..a7ff7c270bc 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1324,6 +1324,9 @@ public:
 
     ThrottlerPtr getBackupsThrottler() const;
 
+    ThrottlerPtr getMutationsThrottler() const;
+    ThrottlerPtr getMergesThrottler() const;
+
     /// Kitchen sink
     using ContextData::KitchenSink;
     using ContextData::kitchen_sink;
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index bf50766c165..a6ea03f8a03 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -1280,6 +1280,7 @@ void MutationsInterpreter::Source::read(
         VirtualColumns virtual_columns(std::move(required_columns), part);
 
         createReadFromPartStep(
+            MergeTreeSequentialSourceType::Mutation,
             plan, *data, storage_snapshot, part,
             std::move(virtual_columns.columns_to_read),
             apply_deleted_mask_, filter, context_,
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 8b5e9ba96ee..5592ffd57dc 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -566,6 +566,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
     for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num)
     {
         Pipe pipe = createMergeTreeSequentialSource(
+            MergeTreeSequentialSourceType::Merge,
             *global_ctx->data,
             global_ctx->storage_snapshot,
             global_ctx->future_part->parts[part_num],
@@ -920,6 +921,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
     for (const auto & part : global_ctx->future_part->parts)
     {
         Pipe pipe = createMergeTreeSequentialSource(
+            MergeTreeSequentialSourceType::Merge,
             *global_ctx->data,
             global_ctx->storage_snapshot,
             part,
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 1e406358277..85dbbf87515 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -30,6 +30,7 @@ class MergeTreeSequentialSource : public ISource
 {
 public:
     MergeTreeSequentialSource(
+        MergeTreeSequentialSourceType type,
         const MergeTreeData & storage_,
         const StorageSnapshotPtr & storage_snapshot_,
         MergeTreeData::DataPartPtr data_part_,
@@ -85,6 +86,7 @@ private:
 
 
 MergeTreeSequentialSource::MergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage_,
     const StorageSnapshotPtr & storage_snapshot_,
     MergeTreeData::DataPartPtr data_part_,
@@ -152,6 +154,17 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
     read_settings.local_fs_method = LocalFSReadMethod::pread;
     if (read_with_direct_io)
         read_settings.direct_io_threshold = 1;
+    /// Configure throttling
+    switch (type)
+    {
+        case Mutation:
+            read_settings.local_throttler = context->getMutationsThrottler();
+            break;
+        case Merge:
+            read_settings.local_throttler = context->getMergesThrottler();
+            break;
+    }
+    read_settings.remote_throttler = read_settings.local_throttler;
 
     MergeTreeReaderSettings reader_settings =
     {
@@ -244,6 +257,7 @@ MergeTreeSequentialSource::~MergeTreeSequentialSource() = default;
 
 
 Pipe createMergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
@@ -264,7 +278,7 @@ Pipe createMergeTreeSequentialSource(
     if (need_to_filter_deleted_rows && !has_filter_column)
         columns_to_read.emplace_back(filter_column.name);
 
-    auto column_part_source = std::make_shared<MergeTreeSequentialSource>(
+    auto column_part_source = std::make_shared<MergeTreeSequentialSource>(type,
         storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
         /*apply_deleted_mask=*/ false, read_with_direct_io, take_column_types_from_storage, quiet);
 
@@ -292,6 +306,7 @@ class ReadFromPart final : public ISourceStep
 {
 public:
     ReadFromPart(
+        MergeTreeSequentialSourceType type_,
         const MergeTreeData & storage_,
         const StorageSnapshotPtr & storage_snapshot_,
         MergeTreeData::DataPartPtr data_part_,
@@ -301,6 +316,7 @@ public:
         ContextPtr context_,
         Poco::Logger * log_)
         : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)})
+        , type(type_)
         , storage(storage_)
         , storage_snapshot(storage_snapshot_)
         , data_part(std::move(data_part_))
@@ -337,7 +353,7 @@ public:
             }
         }
 
-        auto source = createMergeTreeSequentialSource(
+        auto source = createMergeTreeSequentialSource(type,
             storage,
             storage_snapshot,
             data_part,
@@ -353,6 +369,7 @@ public:
     }
 
 private:
+    MergeTreeSequentialSourceType type;
     const MergeTreeData & storage;
     StorageSnapshotPtr storage_snapshot;
     MergeTreeData::DataPartPtr data_part;
@@ -364,6 +381,7 @@ private:
 };
 
 void createReadFromPartStep(
+    MergeTreeSequentialSourceType type,
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
@@ -374,7 +392,7 @@ void createReadFromPartStep(
     ContextPtr context,
     Poco::Logger * log)
 {
-    auto reading = std::make_unique<ReadFromPart>(
+    auto reading = std::make_unique<ReadFromPart>(type,
         storage, storage_snapshot, std::move(data_part),
         std::move(columns_to_read), apply_deleted_mask,
         filter, std::move(context), log);
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h
index 396d3f76886..41def48aab6 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.h
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h
@@ -8,9 +8,16 @@
 namespace DB
 {
 
+enum MergeTreeSequentialSourceType
+{
+    Mutation,
+    Merge,
+};
+
 /// Create stream for reading single part from MergeTree.
 /// If the part has lightweight delete mask then the deleted rows are filtered out.
 Pipe createMergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
@@ -25,6 +32,7 @@ Pipe createMergeTreeSequentialSource(
 class QueryPlan;
 
 void createReadFromPartStep(
+    MergeTreeSequentialSourceType type,
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
diff --git a/tests/integration/test_throttling/configs/static_overrides.xml b/tests/integration/test_throttling/configs/static_overrides.xml
index a8c43f8beaf..9f3bad2f882 100644
--- a/tests/integration/test_throttling/configs/static_overrides.xml
+++ b/tests/integration/test_throttling/configs/static_overrides.xml
@@ -31,4 +31,7 @@
         <allowed_disk>default</allowed_disk>
         <allowed_path>/backups/</allowed_path>
     </backups>
+
+    <max_mutations_bandwidth_for_server>1000000</max_mutations_bandwidth_for_server> <!-- 1M -->
+    <max_merges_bandwidth_for_server>1000000</max_merges_bandwidth_for_server> <!-- 1M -->
 </clickhouse>
diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index 31884fad88a..c53c2bb1ddf 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -430,3 +430,32 @@ def test_write_throttling(policy, mode, setting, value, should_took):
     )
     _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)")
     assert_took(took, should_took)
+
+
+def test_max_mutations_bandwidth_for_server():
+    node.query(
+        """
+        drop table if exists data;
+        create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9;
+    """
+    )
+    node.query("insert into data select * from numbers(1e6)")
+    _, took = elapsed(
+        node.query,
+        "alter table data update key = -key where 1 settings mutations_sync = 1",
+    )
+    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    assert_took(took, 7)
+
+
+def test_max_merges_bandwidth_for_server():
+    node.query(
+        """
+        drop table if exists data;
+        create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9;
+    """
+    )
+    node.query("insert into data select * from numbers(1e6)")
+    _, took = elapsed(node.query, "optimize table data final")
+    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    assert_took(took, 7)

From 83f4b7defb0d8acd10fd25d8c1aff7704140801f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 26 Dec 2023 15:00:44 +0800
Subject: [PATCH 055/204] rebase master

---
 ...f_indexes_support_match_function.reference | 24 +++++++++----------
 ...ngrambf_indexes_support_match_function.sql |  2 ++
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 41ca02e3877..5c6a213a03f 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,10 +2,10 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
-          Granules: 6/6
-          Granules: 2/6
-          Granules: 6/6
-          Granules: 2/6
+            Granules: 6/6
+            Granules: 2/6
+            Granules: 6/6
+            Granules: 2/6
 ---
 1	Hello ClickHouse
 2	Hello World
@@ -13,14 +13,14 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
-          Granules: 6/6
-          Granules: 3/6
-          Granules: 6/6
-          Granules: 3/6
+            Granules: 6/6
+            Granules: 3/6
+            Granules: 6/6
+            Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
-          Granules: 6/6
-          Granules: 1/6
-          Granules: 6/6
-          Granules: 1/6
+            Granules: 6/6
+            Granules: 1/6
+            Granules: 6/6
+            Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index 7378df41b8d..df39be8abd6 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -1,3 +1,4 @@
+SET allow_experimental_analyzer = 1;
 DROP TABLE IF EXISTS tokenbf_tab;
 DROP TABLE IF EXISTS ngrambf_tab;
 
@@ -85,6 +86,7 @@ SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 -- Required string: 'OLAP'
 -- Alternatives: -
 
+set allow_experimental_analyzer = 1;
 SELECT *
 FROM
 (

From 547e3ed6c04a400222c7cab6205a1912c7d41760 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Tue, 26 Dec 2023 22:57:31 +0000
Subject: [PATCH 056/204] Add a check for the 'host_name' parameter

---
 src/Interpreters/DDLTask.cpp | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 0164f5668a2..d386ab9a91d 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -220,7 +220,20 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     bool host_in_hostlist = false;
     std::exception_ptr first_exception = nullptr;
 
-    auto maybe_secure_port = global_context->getTCPPortSecure();
+    const auto maybe_secure_port = global_context->getTCPPortSecure();
+    const auto port = global_context->getTCPPort()
+
+    if (config_host_name)
+    {
+        bool is_local_port = (maybe_secure_port && HostID(*config_host_name, *maybe_secure_port).isLocalAddress(*maybe_secure_port)) ||
+                             HostID(*config_host_name, port).isLocalAddress(port);
+
+        if (!is_local_port)
+            throw Exception(
+                ErrorCodes::DNS_ERROR,
+                "{} is not a local adress. Check parameter 'host_name' in the configuration",
+                *config_host_name)
+    }
 
     for (const HostID & host : entry.hosts)
     {
@@ -229,7 +242,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
             if (config_host_name != host.host_name)
                 continue;
 
-            if (maybe_secure_port != host.port && global_context->getTCPPort() != host.port)
+            if (maybe_secure_port != host.port && port != host.port)
                 continue;
 
             host_in_hostlist = true;
@@ -242,7 +255,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
         {
             /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
             bool is_local_port
-                = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port)) || host.isLocalAddress(global_context->getTCPPort());
+                = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port)) || host.isLocalAddress(port);
 
             if (!is_local_port)
                 continue;

From 3140f869cc05692d6c665b3525efb8b5cd8f0f16 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Wed, 27 Dec 2023 17:15:44 +0100
Subject: [PATCH 057/204] Fix typo

---
 src/Interpreters/DDLTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index d386ab9a91d..e7796c5d3a5 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -231,7 +231,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
         if (!is_local_port)
             throw Exception(
                 ErrorCodes::DNS_ERROR,
-                "{} is not a local adress. Check parameter 'host_name' in the configuration",
+                "{} is not a local address. Check parameter 'host_name' in the configuration",
                 *config_host_name)
     }
 

From 9ef8de21b22bf0bf364c67ea733c78ebe2c4b629 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 7 Dec 2023 20:39:49 +0000
Subject: [PATCH 058/204] Read column once while reading more that one
 subcolumn from it in Compact parts

---
 .../MergeTree/MergeTreeReaderCompact.cpp      | 42 ++++++++++++-------
 .../MergeTree/MergeTreeReaderCompact.h        |  2 +-
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 9713cc8b890..21b6a4f2aad 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -216,6 +216,10 @@ size_t MergeTreeReaderCompact::readRows(
     {
         size_t rows_to_read = data_part_info_for_read->getIndexGranularity().getMarkRows(from_mark);
 
+        /// If we need to read multiple subcolumns from a single column in storage,
+        /// we will read it this column only once and then reuse to extract all subcolumns.
+        std::unordered_map<String, ColumnPtr> columns_cache_for_subcolumns;
+
         for (size_t pos = 0; pos < num_columns; ++pos)
         {
             if (!res_columns[pos])
@@ -226,7 +230,7 @@ size_t MergeTreeReaderCompact::readRows(
                 auto & column = res_columns[pos];
                 size_t column_size_before_reading = column->size();
 
-                readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, columns_for_offsets[pos]);
+                readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, columns_for_offsets[pos], columns_cache_for_subcolumns);
 
                 size_t read_rows_in_column = column->size() - column_size_before_reading;
                 if (read_rows_in_column != rows_to_read)
@@ -265,7 +269,7 @@ size_t MergeTreeReaderCompact::readRows(
 void MergeTreeReaderCompact::readData(
     const NameAndTypePair & name_and_type, ColumnPtr & column,
     size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read,
-    ColumnNameLevel name_level_for_offsets)
+    ColumnNameLevel name_level_for_offsets, std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns)
 {
     const auto & [name, type] = name_and_type;
     std::optional<NameAndTypePair> column_for_offsets;
@@ -331,22 +335,32 @@ void MergeTreeReaderCompact::readData(
     if (name_and_type.isSubcolumn())
     {
         NameAndTypePair name_type_in_storage{name_and_type.getNameInStorage(), name_and_type.getTypeInStorage()};
+        ColumnPtr temp_column;
 
-        /// In case of reading onlys offset use the correct serialization for reading of the prefix
-        auto serialization = getSerializationInPart(name_type_in_storage);
-        ColumnPtr temp_column = name_type_in_storage.type->createColumn(*serialization);
-
-        if (column_for_offsets)
+        auto it = columns_cache_for_subcolumns.find(name_type_in_storage.name);
+        if (it != columns_cache_for_subcolumns.end())
         {
-            auto serialization_for_prefix = getSerializationInPart(*column_for_offsets);
-
-            deserialize_settings.getter = buffer_getter_for_prefix;
-            serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix);
+            temp_column = it->second;
         }
+        else
+        {
+            /// In case of reading onlys offset use the correct serialization for reading of the prefix
+            auto serialization = getSerializationInPart(name_type_in_storage);
+            temp_column = name_type_in_storage.type->createColumn(*serialization);
 
-        deserialize_settings.getter = buffer_getter;
-        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
-        serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr);
+            if (column_for_offsets)
+            {
+                auto serialization_for_prefix = getSerializationInPart(*column_for_offsets);
+
+                deserialize_settings.getter = buffer_getter_for_prefix;
+                serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix);
+            }
+
+            deserialize_settings.getter = buffer_getter;
+            serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
+            serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr);
+            columns_cache_for_subcolumns[name_type_in_storage.name] = temp_column;
+        }
 
         auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h
index cf706526363..dace4ec468e 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.h
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h
@@ -76,7 +76,7 @@ private:
 
     void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark,
         size_t current_task_last_mark, size_t column_position,
-        size_t rows_to_read, ColumnNameLevel name_level_for_offsets);
+        size_t rows_to_read, ColumnNameLevel name_level_for_offsets, std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns);
 
     /// Returns maximal value of granule size in compressed file from @mark_ranges.
     /// This value is used as size of read buffer.

From 3ec1b2a852662353ec3b5894dfd83219ac164181 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 27 Dec 2023 16:32:21 +0000
Subject: [PATCH 059/204] Refactor StorageMerge.

---
 src/Storages/StorageMerge.cpp | 283 +++++++++++++++++-----------------
 src/Storages/StorageMerge.h   | 119 ++++++++++----
 2 files changed, 232 insertions(+), 170 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 36c92129177..b2855dfc61c 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -88,6 +88,20 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+StorageMerge::DatabaseNameOrRegexp::DatabaseNameOrRegexp(
+    const String & source_database_name_or_regexp_,
+    bool database_is_regexp_,
+    std::optional<OptimizedRegularExpression> source_database_regexp_,
+    std::optional<OptimizedRegularExpression> source_table_regexp_,
+    const DBToTableSetMap & source_databases_and_tables_)
+    : source_database_name_or_regexp(source_database_name_or_regexp_)
+    , database_is_regexp(database_is_regexp_)
+    , source_database_regexp(std::move(source_database_regexp_))
+    , source_table_regexp(std::move(source_table_regexp_))
+    , source_databases_and_tables(source_databases_and_tables_)
+{
+}
+
 StorageMerge::StorageMerge(
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
@@ -98,10 +112,11 @@ StorageMerge::StorageMerge(
     ContextPtr context_)
     : IStorage(table_id_)
     , WithContext(context_->getGlobalContext())
-    , source_database_regexp(source_database_name_or_regexp_)
-    , source_databases_and_tables(source_databases_and_tables_)
-    , source_database_name_or_regexp(source_database_name_or_regexp_)
-    , database_is_regexp(database_is_regexp_)
+    , database_name_or_regexp(
+        source_database_name_or_regexp_,
+        database_is_regexp_,
+        source_database_name_or_regexp_, {},
+        source_databases_and_tables_)
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_);
@@ -119,10 +134,11 @@ StorageMerge::StorageMerge(
     ContextPtr context_)
     : IStorage(table_id_)
     , WithContext(context_->getGlobalContext())
-    , source_database_regexp(source_database_name_or_regexp_)
-    , source_table_regexp(source_table_regexp_)
-    , source_database_name_or_regexp(source_database_name_or_regexp_)
-    , database_is_regexp(database_is_regexp_)
+    , database_name_or_regexp(
+        source_database_name_or_regexp_,
+        database_is_regexp_,
+        source_database_name_or_regexp_,
+        source_table_regexp_, {})
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_);
@@ -130,6 +146,11 @@ StorageMerge::StorageMerge(
     setInMemoryMetadata(storage_metadata);
 }
 
+StorageMerge::DatabaseTablesIterators StorageMerge::getDatabaseIterators(ContextPtr context_) const
+{
+    return database_name_or_regexp.getDatabaseIterators(context_);
+}
+
 ColumnsDescription StorageMerge::getColumnsDescriptionFromSourceTables() const
 {
     auto table = getFirstTable([](auto && t) { return t; });
@@ -141,7 +162,7 @@ ColumnsDescription StorageMerge::getColumnsDescriptionFromSourceTables() const
 template <typename F>
 StoragePtr StorageMerge::getFirstTable(F && predicate) const
 {
-    auto database_table_iterators = getDatabaseIterators(getContext());
+    auto database_table_iterators = database_name_or_regexp.getDatabaseIterators(getContext());
 
     for (auto & iterator : database_table_iterators)
     {
@@ -236,27 +257,27 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
     return supported_columns;
 }
 
-bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const
-{
-    /// It's beneficial if it is true for at least one table.
-    StorageListWithLocks selected_tables = getSelectedTables(query_context);
+// bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const
+// {
+//     /// It's beneficial if it is true for at least one table.
+//     StorageListWithLocks selected_tables = getSelectedTables(query_context);
 
-    size_t i = 0;
-    for (const auto & table : selected_tables)
-    {
-        const auto & storage_ptr = std::get<1>(table);
-        auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-        if (storage_ptr->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot))
-            return true;
+//     size_t i = 0;
+//     for (const auto & table : selected_tables)
+//     {
+//         const auto & storage_ptr = std::get<1>(table);
+//         auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
+//         if (storage_ptr->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot))
+//             return true;
 
-        ++i;
-        /// For simplicity reasons, check only first ten tables.
-        if (i > 10)
-            break;
-    }
+//         ++i;
+//         /// For simplicity reasons, check only first ten tables.
+//         if (i > 10)
+//             break;
+//     }
 
-    return false;
-}
+//     return false;
+// }
 
 
 QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
@@ -277,7 +298,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
 
     auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
 
-    DatabaseTablesIterators database_table_iterators = getDatabaseIterators(local_context);
+    DatabaseTablesIterators database_table_iterators = database_name_or_regexp.getDatabaseIterators(local_context);
 
     size_t selected_table_size = 0;
 
@@ -320,43 +341,26 @@ void StorageMerge::read(
     auto modified_context = Context::createCopy(local_context);
     modified_context->setSetting("optimize_move_to_prewhere", false);
 
-    bool has_database_virtual_column = false;
-    bool has_table_virtual_column = false;
-    Names real_column_names;
-    real_column_names.reserve(column_names.size());
 
-    for (const auto & column_name : column_names)
-    {
-        if (column_name == "_database" && isVirtualColumn(column_name, storage_snapshot->metadata))
-            has_database_virtual_column = true;
-        else if (column_name == "_table" && isVirtualColumn(column_name, storage_snapshot->metadata))
-            has_table_virtual_column = true;
-        else
-            real_column_names.push_back(column_name);
-    }
+    // InputOrderInfoPtr input_sorting_info;
+    // if (query_info.order_optimizer)
+    // {
+    //     for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
+    //     {
+    //         auto storage_ptr = std::get<1>(*it);
+    //         auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
+    //         auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, modified_context);
+    //         if (it == selected_tables.begin())
+    //             input_sorting_info = current_info;
+    //         else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
+    //             input_sorting_info.reset();
 
-    StorageListWithLocks selected_tables
-        = getSelectedTables(modified_context, query_info.query, has_database_virtual_column, has_table_virtual_column);
+    //         if (!input_sorting_info)
+    //             break;
+    //     }
 
-    InputOrderInfoPtr input_sorting_info;
-    if (query_info.order_optimizer)
-    {
-        for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
-        {
-            auto storage_ptr = std::get<1>(*it);
-            auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-            auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, modified_context);
-            if (it == selected_tables.begin())
-                input_sorting_info = current_info;
-            else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
-                input_sorting_info.reset();
-
-            if (!input_sorting_info)
-                break;
-        }
-
-        query_info.input_order_info = input_sorting_info;
-    }
+    //     query_info.input_order_info = input_sorting_info;
+    // }
 
     query_plan.addInterpreterContext(modified_context);
 
@@ -365,10 +369,11 @@ void StorageMerge::read(
 
     auto step = std::make_unique<ReadFromMerge>(
         common_header,
-        std::move(selected_tables),
-        real_column_names,
-        has_database_virtual_column,
-        has_table_virtual_column,
+        //std::move(selected_tables),
+        //real_column_names,
+        column_names,
+        // has_database_virtual_column,
+        // has_table_virtual_column,
         max_block_size,
         num_streams,
         shared_from_this(),
@@ -380,43 +385,12 @@ void StorageMerge::read(
     query_plan.addStep(std::move(step));
 }
 
-/// An object of this helper class is created
-///  when processing a Merge table data source (subordinary table)
-///  that has row policies
-///  to guarantee that these row policies are applied
-class ReadFromMerge::RowPolicyData
-{
-public:
-    RowPolicyData(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
-
-    /// Add to data stream columns that are needed only for row policies
-    ///  SELECT x from T  if  T has row policy  y=42
-    ///  required y in data pipeline
-    void extendNames(Names &) const;
-
-    /// Use storage facilities to filter data
-    ///  optimization
-    ///  does not guarantee accuracy, but reduces number of rows
-    void addStorageFilter(SourceStepWithFilter *) const;
-
-    /// Create explicit filter transform to exclude
-    /// rows that are not conform to row level policy
-    void addFilterTransform(QueryPipelineBuilder &) const;
-
-private:
-    std::string filter_column_name; // complex filter, may contain logic operations
-    ActionsDAGPtr actions_dag;
-    ExpressionActionsPtr filter_actions;
-    StorageMetadataPtr storage_metadata_snapshot;
-};
-
-
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
-    StorageListWithLocks selected_tables_,
-    Names column_names_,
-    bool has_database_virtual_column_,
-    bool has_table_virtual_column_,
+    //StorageListWithLocks selected_tables_,
+    Names all_column_names_,
+    // bool has_database_virtual_column_,
+    // bool has_table_virtual_column_,
     size_t max_block_size,
     size_t num_streams,
     StoragePtr storage,
@@ -428,21 +402,22 @@ ReadFromMerge::ReadFromMerge(
     , required_max_block_size(max_block_size)
     , requested_num_streams(num_streams)
     , common_header(std::move(common_header_))
-    , selected_tables(std::move(selected_tables_))
-    , column_names(std::move(column_names_))
-    , has_database_virtual_column(has_database_virtual_column_)
-    , has_table_virtual_column(has_table_virtual_column_)
+    //, selected_tables(std::move(selected_tables_))
+    , all_column_names(std::move(all_column_names_))
+    // , has_database_virtual_column(has_database_virtual_column_)
+    // , has_table_virtual_column(has_table_virtual_column_)
     , storage_merge(std::move(storage))
     , merge_storage_snapshot(std::move(storage_snapshot))
     , query_info(query_info_)
     , context(std::move(context_))
     , common_processed_stage(processed_stage)
 {
-    createChildPlans();
 }
 
 void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
+    filterTablesAndCreateChildPlans();
+
     if (selected_tables.empty())
     {
         pipeline.init(Pipe(std::make_shared<NullSource>(output_stream->header)));
@@ -452,13 +427,10 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
     QueryPlanResourceHolder resources;
     std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines;
 
-    chassert(selected_tables.size() == child_plans.size());
-    chassert(selected_tables.size() == table_aliases.size());
-    chassert(selected_tables.size() == table_row_policy_data_opts.size());
     auto table_it = selected_tables.begin();
     for (size_t i = 0; i < selected_tables.size(); ++i, ++table_it)
     {
-        auto & plan = child_plans.at(i);
+        auto & child_plan = child_plans->at(i);
         const auto & table = *table_it;
 
         const auto storage = std::get<1>(table);
@@ -468,13 +440,13 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         auto modified_query_info = getModifiedQueryInfo(query_info, context, table, nested_storage_snaphsot);
 
         auto source_pipeline = createSources(
-            plan,
+            child_plan.plan,
             nested_storage_snaphsot,
             modified_query_info,
             common_processed_stage,
             common_header,
-            table_aliases.at(i),
-            table_row_policy_data_opts.at(i),
+            child_plan.table_aliases,
+            child_plan.row_policy_data_opt,
             table,
             context);
 
@@ -512,10 +484,37 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
     pipeline.addResources(std::move(resources));
 }
 
-void ReadFromMerge::createChildPlans()
+void ReadFromMerge::filterTablesAndCreateChildPlans()
+{
+    if (child_plans)
+        return;
+
+    has_database_virtual_column = false;
+    has_table_virtual_column = false;
+    column_names.clear();
+    column_names.reserve(column_names.size());
+
+    for (const auto & column_name : all_column_names)
+    {
+        if (column_name == "_database" && storage_merge->isVirtualColumn(column_name, merge_storage_snapshot->metadata))
+            has_database_virtual_column = true;
+        else if (column_name == "_table" && storage_merge->isVirtualColumn(column_name, merge_storage_snapshot->metadata))
+            has_table_virtual_column = true;
+        else
+            column_names.push_back(column_name);
+    }
+
+    selected_tables = getSelectedTables(context, query_info.query, has_database_virtual_column, has_table_virtual_column);
+
+    child_plans = createChildPlans(query_info);
+}
+
+std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildPlans(SelectQueryInfo & query_info_) const
 {
     if (selected_tables.empty())
-        return;
+        return {};
+
+    std::vector<ChildPlan> res;
 
     size_t tables_count = selected_tables.size();
     Float64 num_streams_multiplier
@@ -525,7 +524,7 @@ void ReadFromMerge::createChildPlans()
 
     if (order_info)
     {
-        query_info.input_order_info = order_info;
+        query_info_.input_order_info = order_info;
     }
     else if (query_info.order_optimizer)
     {
@@ -544,7 +543,7 @@ void ReadFromMerge::createChildPlans()
                 break;
         }
 
-        query_info.input_order_info = input_sorting_info;
+        query_info_.input_order_info = input_sorting_info;
     }
 
     for (const auto & table : selected_tables)
@@ -564,8 +563,10 @@ void ReadFromMerge::createChildPlans()
         if (sampling_requested && !storage->supportsSampling())
             throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table {} doesn't support sampling", storage->getStorageID().getNameForLogs());
 
-        auto & aliases = table_aliases.emplace_back();
-        auto & row_policy_data_opt = table_row_policy_data_opts.emplace_back();
+        res.emplace_back();
+
+        auto & aliases = res.back().table_aliases;
+        auto & row_policy_data_opt = res.back().row_policy_data_opt;
         auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
         auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
 
@@ -638,7 +639,7 @@ void ReadFromMerge::createChildPlans()
             }
         }
 
-        child_plans.emplace_back(createPlanForTable(
+        res.back().plan = createPlanForTable(
             nested_storage_snaphsot,
             modified_query_info,
             common_processed_stage,
@@ -647,8 +648,10 @@ void ReadFromMerge::createChildPlans()
             column_names_as_aliases.empty() ? std::move(real_column_names) : std::move(column_names_as_aliases),
             row_policy_data_opt,
             context,
-            current_streams));
+            current_streams);
     }
+
+    return res;
 }
 
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & query_info,
@@ -826,7 +829,7 @@ QueryPlan ReadFromMerge::createPlanForTable(
     Names && real_column_names,
     const RowPolicyDataOpt & row_policy_data_opt,
     ContextMutablePtr modified_context,
-    size_t streams_num)
+    size_t streams_num) const
 {
     const auto & [database_name, storage, _, table_name] = storage_with_lock;
 
@@ -989,7 +992,7 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
     });
 }
 
-StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
+StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables(
     ContextPtr query_context,
     const ASTPtr & query /* = nullptr */,
     bool filter_by_database_virtual_column /* = false */,
@@ -1002,8 +1005,8 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
     assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query);
 
     const Settings & settings = query_context->getSettingsRef();
-    StorageListWithLocks selected_tables;
-    DatabaseTablesIterators database_table_iterators = getDatabaseIterators(getContext());
+    StorageListWithLocks res;
+    DatabaseTablesIterators database_table_iterators = assert_cast<StorageMerge &>(*storage_merge).getDatabaseIterators(query_context);
 
     MutableColumnPtr database_name_virtual_column;
     MutableColumnPtr table_name_virtual_column;
@@ -1030,10 +1033,10 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
             if (query && query->as<ASTSelectQuery>()->prewhere() && !storage->supportsPrewhere())
                 throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE.", storage->getName());
 
-            if (storage.get() != this)
+            if (storage.get() != storage_merge.get())
             {
                 auto table_lock = storage->lockForShare(query_context->getCurrentQueryId(), settings.lock_acquire_timeout);
-                selected_tables.emplace_back(iterator->databaseName(), storage, std::move(table_lock), iterator->name());
+                res.emplace_back(iterator->databaseName(), storage, std::move(table_lock), iterator->name());
                 if (filter_by_table_virtual_column)
                     table_name_virtual_column->insert(iterator->name());
             }
@@ -1051,7 +1054,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
         auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_database");
 
         /// Remove unused databases from the list
-        selected_tables.remove_if([&](const auto & elem) { return values.find(std::get<0>(elem)) == values.end(); });
+        res.remove_if([&](const auto & elem) { return values.find(std::get<0>(elem)) == values.end(); });
     }
 
     if (filter_by_table_virtual_column)
@@ -1062,13 +1065,13 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
         auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table");
 
         /// Remove unused tables from the list
-        selected_tables.remove_if([&](const auto & elem) { return values.find(std::get<3>(elem)) == values.end(); });
+        res.remove_if([&](const auto & elem) { return values.find(std::get<3>(elem)) == values.end(); });
     }
 
-    return selected_tables;
+    return res;
 }
 
-DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const String & database_name, ContextPtr local_context) const
+DatabaseTablesIteratorPtr StorageMerge::DatabaseNameOrRegexp::getDatabaseIterator(const String & database_name, ContextPtr local_context) const
 {
     auto database = DatabaseCatalog::instance().getDatabase(database_name);
 
@@ -1088,7 +1091,7 @@ DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const String & datab
     return database->getTablesIterator(local_context, table_name_match);
 }
 
-StorageMerge::DatabaseTablesIterators StorageMerge::getDatabaseIterators(ContextPtr local_context) const
+StorageMerge::DatabaseTablesIterators StorageMerge::DatabaseNameOrRegexp::getDatabaseIterators(ContextPtr local_context) const
 {
     try
     {
@@ -1215,6 +1218,8 @@ void ReadFromMerge::convertAndFilterSourceStream(
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
 {
+    filterTablesAndCreateChildPlans();
+
     /// Disable read-in-order optimization for reverse order with final.
     /// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order).
     if (order_info_->direction != 1 && InterpreterSelectQuery::isQueryWithFinal(query_info))
@@ -1227,9 +1232,9 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
     };
 
     bool ok = true;
-    for (const auto & plan : child_plans)
-        if (plan.isInitialized())
-            ok &= recursivelyApplyToReadingSteps(plan.getRootNode(), request_read_in_order);
+    for (const auto & child_plan : *child_plans)
+        if (child_plan.plan.isInitialized())
+            ok &= recursivelyApplyToReadingSteps(child_plan.plan.getRootNode(), request_read_in_order);
 
     if (!ok)
         return false;
@@ -1256,9 +1261,11 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan) const
 
 void ReadFromMerge::applyFilters()
 {
-    for (const auto & plan : child_plans)
-        if (plan.isInitialized())
-            applyFilters(plan);
+    filterTablesAndCreateChildPlans();
+
+    for (const auto & child_plan : *child_plans)
+        if (child_plan.plan.isInitialized())
+            applyFilters(child_plan.plan);
 }
 
 IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 2455eb678bb..fe55034bc49 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -12,6 +12,9 @@ namespace DB
 
 struct QueryPlanResourceHolder;
 
+struct RowPolicyFilter;
+using RowPolicyFilterPtr = std::shared_ptr<const RowPolicyFilter>;
+
 /** A table that represents the union of an arbitrary number of other tables.
   * All tables must have the same structure.
   */
@@ -71,8 +74,8 @@ public:
     /// the structure of sub-tables is not checked
     void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
 
-    bool mayBenefitFromIndexForIn(
-        const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override;
+    // bool mayBenefitFromIndexForIn(
+    //     const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override;
 
     /// Evaluate database name or regexp for StorageMerge and TableFunction merge
     static std::tuple<bool /* is_regexp */, ASTPtr> evaluateDatabaseName(const ASTPtr & node, ContextPtr context);
@@ -82,24 +85,36 @@ public:
     std::optional<UInt64> totalRows(const Settings & settings) const override;
     std::optional<UInt64> totalBytes(const Settings & settings) const override;
 
+    using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
+    DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
+
 private:
-    std::optional<OptimizedRegularExpression> source_database_regexp;
-    std::optional<OptimizedRegularExpression> source_table_regexp;
-    std::optional<DBToTableSetMap> source_databases_and_tables;
-
-    String source_database_name_or_regexp;
-    bool database_is_regexp = false;
-
     /// (Database, Table, Lock, TableName)
     using StorageWithLockAndName = std::tuple<String, StoragePtr, TableLockHolder, String>;
     using StorageListWithLocks = std::list<StorageWithLockAndName>;
-    using DatabaseTablesIterators = std::vector<DatabaseTablesIteratorPtr>;
 
-    StorageMerge::StorageListWithLocks getSelectedTables(
-        ContextPtr query_context,
-        const ASTPtr & query = nullptr,
-        bool filter_by_database_virtual_column = false,
-        bool filter_by_table_virtual_column = false) const;
+    struct DatabaseNameOrRegexp
+    {
+        String source_database_name_or_regexp;
+        bool database_is_regexp = false;
+
+        std::optional<OptimizedRegularExpression> source_database_regexp;
+        std::optional<OptimizedRegularExpression> source_table_regexp;
+        std::optional<DBToTableSetMap> source_databases_and_tables;
+
+        DatabaseNameOrRegexp(
+            const String & source_database_name_or_regexp_,
+            bool database_is_regexp_,
+            std::optional<OptimizedRegularExpression> source_database_regexp_,
+            std::optional<OptimizedRegularExpression> source_table_regexp_,
+            const DBToTableSetMap & source_databases_and_tables_);
+
+        DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
+
+        DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
+    };
+
+    DatabaseNameOrRegexp database_name_or_regexp;
 
     template <typename F>
     StoragePtr getFirstTable(F && predicate) const;
@@ -107,10 +122,6 @@ private:
     template <typename F>
     void forEachTable(F && func) const;
 
-    DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
-
-    DatabaseTablesIterators getDatabaseIterators(ContextPtr context) const;
-
     NamesAndTypesList getVirtuals() const override;
     ColumnSizeByName getColumnSizes() const override;
 
@@ -136,10 +147,9 @@ public:
 
     ReadFromMerge(
         Block common_header_,
-        StorageListWithLocks selected_tables_,
-        Names column_names_,
-        bool has_database_virtual_column_,
-        bool has_table_virtual_column_,
+        Names all_column_names_,
+        // bool has_database_virtual_column_,
+        // bool has_table_virtual_column_,
         size_t max_block_size,
         size_t num_streams,
         StoragePtr storage,
@@ -163,16 +173,13 @@ private:
     const Block common_header;
 
     StorageListWithLocks selected_tables;
+    Names all_column_names;
     Names column_names;
     bool has_database_virtual_column;
     bool has_table_virtual_column;
     StoragePtr storage_merge;
     StorageSnapshotPtr merge_storage_snapshot;
 
-    /// Store read plan for each child table.
-    /// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE).
-    std::vector<QueryPlan> child_plans;
-
     SelectQueryInfo query_info;
     ContextMutablePtr context;
     QueryProcessingStage::Enum common_processed_stage;
@@ -188,14 +195,56 @@ private:
 
     using Aliases = std::vector<AliasData>;
 
-    class RowPolicyData;
+    /// An object of this helper class is created
+    ///  when processing a Merge table data source (subordinary table)
+    ///  that has row policies
+    ///  to guarantee that these row policies are applied
+    class RowPolicyData
+    {
+    public:
+        RowPolicyData(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
+
+        /// Add to data stream columns that are needed only for row policies
+        ///  SELECT x from T  if  T has row policy  y=42
+        ///  required y in data pipeline
+        void extendNames(Names &) const;
+
+        /// Use storage facilities to filter data
+        ///  optimization
+        ///  does not guarantee accuracy, but reduces number of rows
+        void addStorageFilter(SourceStepWithFilter *) const;
+
+        /// Create explicit filter transform to exclude
+        /// rows that are not conform to row level policy
+        void addFilterTransform(QueryPipelineBuilder &) const;
+
+    private:
+        std::string filter_column_name; // complex filter, may contain logic operations
+        ActionsDAGPtr actions_dag;
+        ExpressionActionsPtr filter_actions;
+        StorageMetadataPtr storage_metadata_snapshot;
+    };
+
     using RowPolicyDataOpt = std::optional<RowPolicyData>;
 
-    std::vector<Aliases> table_aliases;
+    // std::vector<Aliases> table_aliases;
 
-    std::vector<RowPolicyDataOpt> table_row_policy_data_opts;
+    // std::vector<RowPolicyDataOpt> table_row_policy_data_opts;
 
-    void createChildPlans();
+    struct ChildPlan
+    {
+        QueryPlan plan;
+        Aliases table_aliases;
+        RowPolicyDataOpt row_policy_data_opt;
+    };
+
+    /// Store read plan for each child table.
+    /// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE).
+    std::optional<std::vector<ChildPlan>> child_plans;
+
+    std::vector<ChildPlan> createChildPlans(SelectQueryInfo & query_info_) const;
+
+    void filterTablesAndCreateChildPlans();
 
     void applyFilters(const QueryPlan & plan) const;
 
@@ -208,7 +257,7 @@ private:
         Names && real_column_names,
         const RowPolicyDataOpt & row_policy_data_opt,
         ContextMutablePtr modified_context,
-        size_t streams_num);
+        size_t streams_num) const;
 
     QueryPipelineBuilderPtr createSources(
         QueryPlan & plan,
@@ -235,6 +284,12 @@ private:
         ContextPtr context,
         QueryPipelineBuilder & builder,
         QueryProcessingStage::Enum processed_stage);
+
+    StorageMerge::StorageListWithLocks getSelectedTables(
+        ContextPtr query_context,
+        const ASTPtr & query = nullptr,
+        bool filter_by_database_virtual_column = false,
+        bool filter_by_table_virtual_column = false) const;
 };
 
 }

From 2f50d3da5060a3aa3ee66154db822512c047e7e6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 27 Dec 2023 17:05:23 +0000
Subject: [PATCH 060/204] Filter virtual columns for StorageMerge from plan
 filter condition.

---
 src/Storages/StorageMerge.cpp | 29 ++++++++++++++---------------
 src/Storages/StorageMerge.h   |  5 ++---
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index b2855dfc61c..2798965f924 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -504,7 +504,7 @@ void ReadFromMerge::filterTablesAndCreateChildPlans()
             column_names.push_back(column_name);
     }
 
-    selected_tables = getSelectedTables(context, query_info.query, has_database_virtual_column, has_table_virtual_column);
+    selected_tables = getSelectedTables(context, has_database_virtual_column, has_table_virtual_column);
 
     child_plans = createChildPlans(query_info);
 }
@@ -994,16 +994,9 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
 
 StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables(
     ContextPtr query_context,
-    const ASTPtr & query /* = nullptr */,
-    bool filter_by_database_virtual_column /* = false */,
-    bool filter_by_table_virtual_column /* = false */) const
+    bool filter_by_database_virtual_column,
+    bool filter_by_table_virtual_column) const
 {
-    /// FIXME: filtering does not work with allow_experimental_analyzer due to
-    /// different column names there (it has "table_name._table" not just
-    /// "_table")
-
-    assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query);
-
     const Settings & settings = query_context->getSettingsRef();
     StorageListWithLocks res;
     DatabaseTablesIterators database_table_iterators = assert_cast<StorageMerge &>(*storage_merge).getDatabaseIterators(query_context);
@@ -1030,9 +1023,6 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables(
             if (!storage)
                 continue;
 
-            if (query && query->as<ASTSelectQuery>()->prewhere() && !storage->supportsPrewhere())
-                throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE.", storage->getName());
-
             if (storage.get() != storage_merge.get())
             {
                 auto table_lock = storage->lockForShare(query_context->getCurrentQueryId(), settings.lock_acquire_timeout);
@@ -1045,12 +1035,21 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables(
         }
     }
 
+    if (!filter_by_database_virtual_column && !filter_by_table_virtual_column)
+        return res;
+
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    if (!filter_actions_dag)
+        return res;
+
+    const auto * predicate = filter_actions_dag->getOutputs().at(0);
+
     if (filter_by_database_virtual_column)
     {
         /// Filter names of selected tables if there is a condition on "_database" virtual column in WHERE clause
         Block virtual_columns_block
             = Block{ColumnWithTypeAndName(std::move(database_name_virtual_column), std::make_shared<DataTypeString>(), "_database")};
-        VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, query_context);
+        VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context);
         auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_database");
 
         /// Remove unused databases from the list
@@ -1061,7 +1060,7 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables(
     {
         /// Filter names of selected tables if there is a condition on "_table" virtual column in WHERE clause
         Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(table_name_virtual_column), std::make_shared<DataTypeString>(), "_table")};
-        VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, query_context);
+        VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context);
         auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table");
 
         /// Remove unused tables from the list
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index fe55034bc49..a6cb89f0a42 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -287,9 +287,8 @@ private:
 
     StorageMerge::StorageListWithLocks getSelectedTables(
         ContextPtr query_context,
-        const ASTPtr & query = nullptr,
-        bool filter_by_database_virtual_column = false,
-        bool filter_by_table_virtual_column = false) const;
+        bool filter_by_database_virtual_column,
+        bool filter_by_table_virtual_column) const;
 };
 
 }

From 9f9b080b008a3dec2af9e604e895dc5339b50893 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 27 Dec 2023 18:33:00 +0100
Subject: [PATCH 061/204] Update StorageMerge.cpp

---
 src/Storages/StorageMerge.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 2798965f924..88fb203f658 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -80,7 +80,6 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int NOT_IMPLEMENTED;
-    extern const int ILLEGAL_PREWHERE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int SAMPLING_NOT_SUPPORTED;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;

From 87eb18eb748f58e3cfbfe96d03124f3b0e04b7ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 28 Dec 2023 01:02:03 +0100
Subject: [PATCH 062/204] Speed up numbers table function

---
 .../QueryPlan/ReadFromSystemNumbersStep.cpp   | 36 +++++++++++++++----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index 41690c1b132..a88203e0fca 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -9,6 +9,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/System/StorageSystemNumbers.h>
+#include <Common/TargetSpecific.h>
 #include <Common/typeid_cast.h>
 
 namespace DB
@@ -22,6 +23,27 @@ extern const int TOO_MANY_ROWS;
 namespace
 {
 
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(void),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((UInt64 * begin, UInt64 count, UInt64 first_value)
+    {
+        for (UInt64 i = 0; i < count; i++)
+            *(begin + i) = first_value + i;
+    })
+)
+
+static void iota(UInt64 * begin, UInt64 count, UInt64 first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
 class NumbersSource : public ISource
 {
 public:
@@ -43,8 +65,7 @@ protected:
         size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
-        while (pos < end)
-            *pos++ = curr++;
+        iota(pos, end - pos, curr);
 
         next += step;
 
@@ -211,17 +232,18 @@ protected:
                 {
                     auto start_value_64 = static_cast<UInt64>(start_value);
                     auto end_value_64 = static_cast<UInt64>(end_value);
-                    while (start_value_64 < end_value_64)
-                        *(pos++) = start_value_64++;
+                    auto size = end_value_64 - start_value_64;
+                    iota(pos, size, start_value_64);
+                    pos += size;
                 }
             };
 
             if (can_provide > need)
             {
                 UInt64 start_value = first_value(range) + cursor.offset_in_range;
-                UInt64 end_value = start_value + need; /// end_value will never overflow
-                while (start_value < end_value)
-                    *(pos++) = start_value++;
+                /// end_value will never overflow
+                iota(pos, need, start_value);
+                pos += need;
 
                 provided += need;
                 cursor.offset_in_range += need;

From e1a9baa5b07067a5ec2355bae772ea6fc8dd2ec0 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 28 Dec 2023 13:51:37 +0000
Subject: [PATCH 063/204] Fix

---
 src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 21b6a4f2aad..e3fbece37ea 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -331,6 +331,7 @@ void MergeTreeReaderCompact::readData(
 
     ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
     deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
+    bool columns_cache_was_used = false;
 
     if (name_and_type.isSubcolumn())
     {
@@ -341,6 +342,7 @@ void MergeTreeReaderCompact::readData(
         if (it != columns_cache_for_subcolumns.end())
         {
             temp_column = it->second;
+            columns_cache_was_used = true;
         }
         else
         {
@@ -388,8 +390,8 @@ void MergeTreeReaderCompact::readData(
         serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr);
     }
 
-    /// The buffer is left in inconsistent state after reading single offsets
-    if (name_level_for_offsets.has_value())
+    /// The buffer is left in inconsistent state after reading single offsets or using columns cache during subcolumns reading.
+    if (name_level_for_offsets.has_value() || columns_cache_was_used)
         last_read_granule.reset();
     else
         last_read_granule.emplace(from_mark, column_position);

From 146de5b220d57fa53f1f9bf2c66742202140d807 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 5 Dec 2023 12:28:23 +0000
Subject: [PATCH 064/204] Ignore MVs with dropped target table during pushing
 to views

---
 .../Transforms/buildPushingToViewsChain.cpp   |  6 +++++-
 ...ropped_target_table_no_exception.reference |  4 ++++
 ...with_dropped_target_table_no_exception.sql | 20 +++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
 create mode 100644 tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index f85dc28f4c7..b8aafe305a8 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -316,7 +316,11 @@ Chain buildPushingToViewsChain(
             type = QueryViewsLogElement::ViewType::MATERIALIZED;
             result_chain.addTableLock(lock);
 
-            StoragePtr inner_table = materialized_view->getTargetTable();
+            StoragePtr inner_table = materialized_view->tryGetTargetTable();
+            /// If target table was dropped, ignore this materialized view.
+            if (!inner_table)
+                continue;
+
             auto inner_table_id = inner_table->getStorageID();
             auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
 
diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
new file mode 100644
index 00000000000..8fb8a08e3f9
--- /dev/null
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
@@ -0,0 +1,4 @@
+42
+42
+42
+42
diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
new file mode 100644
index 00000000000..744b2578617
--- /dev/null
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
@@ -0,0 +1,20 @@
+drop table if exists from_table;
+drop table if exists to_table;
+drop table if exists mv;
+
+create table from_table (x UInt32) engine=MergeTree order by x;
+create table to_table (x UInt32) engine=MergeTree order by x;
+create materialized view mv to to_table as select * from from_table;
+
+insert into from_table select 42;
+select * from from_table;
+select * from to_table;
+
+drop table to_table;
+
+insert into from_table select 42;
+select * from from_table;
+
+drop table from_table;
+drop view mv;
+

From 50e9c9bb4ee7e8449cc9189a3ac5dcea043b971a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 28 Dec 2023 14:59:33 +0000
Subject: [PATCH 065/204] Fixing tests.

---
 src/Storages/StorageMerge.cpp | 4 ++--
 src/Storages/StorageMerge.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 9bf1cc3ada8..83cb9fc058a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -92,12 +92,12 @@ StorageMerge::DatabaseNameOrRegexp::DatabaseNameOrRegexp(
     bool database_is_regexp_,
     std::optional<OptimizedRegularExpression> source_database_regexp_,
     std::optional<OptimizedRegularExpression> source_table_regexp_,
-    const DBToTableSetMap & source_databases_and_tables_)
+    std::optional<DBToTableSetMap> source_databases_and_tables_)
     : source_database_name_or_regexp(source_database_name_or_regexp_)
     , database_is_regexp(database_is_regexp_)
     , source_database_regexp(std::move(source_database_regexp_))
     , source_table_regexp(std::move(source_table_regexp_))
-    , source_databases_and_tables(source_databases_and_tables_)
+    , source_databases_and_tables(std::move(source_databases_and_tables_))
 {
 }
 
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 25472db206f..1fd7ff43bc2 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -103,7 +103,7 @@ private:
             bool database_is_regexp_,
             std::optional<OptimizedRegularExpression> source_database_regexp_,
             std::optional<OptimizedRegularExpression> source_table_regexp_,
-            const DBToTableSetMap & source_databases_and_tables_);
+            std::optional<DBToTableSetMap> source_databases_and_tables_);
 
         DatabaseTablesIteratorPtr getDatabaseIterator(const String & database_name, ContextPtr context) const;
 

From e66701dd101da0f446eb9b5b52a9aa48aef42a89 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 28 Dec 2023 15:00:39 +0000
Subject: [PATCH 066/204] Add setting
 ignore_materialized_views_with_dropped_target_table

---
 src/Core/Settings.h                                 |  1 +
 .../Transforms/buildPushingToViewsChain.cpp         | 13 ++++++++++++-
 src/Storages/StorageMaterializedView.h              |  1 +
 ..._race_condition_between_insert_and_droppin_mv.sh |  2 +-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d96b1b9fc10..9e485d88772 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -584,6 +584,7 @@ class IColumn;
     M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
     M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
     M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
+    M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped taraget table during pushing to views", 0) \
     M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
     M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
     M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index b8aafe305a8..ab9b3a80f12 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -39,6 +39,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int UNKNOWN_TABLE;
 }
 
 ThreadStatusesHolder::~ThreadStatusesHolder()
@@ -319,7 +320,17 @@ Chain buildPushingToViewsChain(
             StoragePtr inner_table = materialized_view->tryGetTargetTable();
             /// If target table was dropped, ignore this materialized view.
             if (!inner_table)
-                continue;
+            {
+                if (context->getSettingsRef().ignore_materialized_views_with_dropped_target_table)
+                    continue;
+
+                throw Exception(
+                    ErrorCodes::UNKNOWN_TABLE,
+                    "Target table '{}' of view '{}' doesn't exists. To ignore this view use setting "
+                    "ignore_materialized_views_with_dropped_target_table",
+                    materialized_view->getTargetTableId().getFullTableName(),
+                    view_id.getFullTableName());
+            }
 
             auto inner_table_id = inner_table->getStorageID();
             auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index f37abdfb1a3..8d7f3e5a9a7 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -71,6 +71,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
+    const StorageID & getTargetTableId() const { return target_table_id; }
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;
diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
index 9ce4b459fce..6899b31d1d9 100755
--- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
+++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
@@ -14,7 +14,7 @@ function insert {
     offset=500
     while true;
     do
-        ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_race_condition_landing SELECT number, toString(number), toString(number) from system.numbers limit $i, $offset"
+        ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_race_condition_landing SELECT number, toString(number), toString(number) from system.numbers limit $i, $offset settings ignore_materialized_views_with_dropped_target_table=1"
         i=$(( $i + $RANDOM % 100 + 400 ))
     done
 }

From 0faf784d2f39e25396dba32a1667814fdce7f850 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 28 Dec 2023 16:59:57 +0000
Subject: [PATCH 067/204] Add a test for alias in USING clause

---
 .../02955_analyzer_using_functional_args.reference    |  1 +
 .../02955_analyzer_using_functional_args.sql          | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
 create mode 100644 tests/queries/0_stateless/02955_analyzer_using_functional_args.sql

diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
new file mode 100644
index 00000000000..e4c1fd86b09
--- /dev/null
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
@@ -0,0 +1,11 @@
+CREATE TABLE t1 (x Int16, y ALIAS x + x * 2) ENGINE=MergeTree() ORDER BY x;
+CREATE TABLE t2 (y Int16, z Int16) ENGINE=MergeTree() ORDER BY y;
+
+INSERT INTO t1 VALUES (1231), (123);
+INSERT INTO t2 VALUES (6666, 48);
+INSERT INTO t2 VALUES (369, 50);
+
+SELECT count() FROM t1 INNER JOIN t2 USING (y);
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;

From d7a473e3863b082eca1750328487180892ea16ee Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 28 Dec 2023 17:34:28 +0000
Subject: [PATCH 068/204] Fix some test.

---
 src/Interpreters/ActionsDAG.h       | 1 +
 src/Storages/VirtualColumnUtils.cpp | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 94b6b1ac41d..f18ae5d5c75 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -115,6 +115,7 @@ public:
     explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
 
     const Nodes & getNodes() const { return nodes; }
+    static Nodes detachNodes(ActionsDAG && dag) { return std::move(dag.nodes); }
     const NodeRawConstPtrs & getOutputs() const { return outputs; }
     /** Output nodes can contain any column returned from DAG.
       * You may manually change it if needed.
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index aed06fb0540..f5284eeb21b 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -504,7 +504,11 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
                 /// at least two arguments; also it can't be reduced to (256) because result type is different.
                 /// TODO: add CAST here
                 if (!res->result_type->equals(*node->result_type))
-                    return nullptr;
+                {
+                    ActionsDAG tmp_dag;
+                    res = &tmp_dag.addCast(*res, node->result_type, {});
+                    additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(tmp_dag)));
+                }
 
                 return res;
             }

From 4c68716df75ca08382cdb037f286f1efaf11a56e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 28 Dec 2023 17:51:11 +0000
Subject: [PATCH 069/204] Fix another test.

---
 src/Storages/StorageMerge.cpp | 7 +++++++
 src/Storages/StorageMerge.h   | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 83cb9fc058a..bf9dfc37440 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -51,6 +51,7 @@
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
+#include "Parsers/queryToString.h"
 
 namespace
 {
@@ -1191,6 +1192,12 @@ void ReadFromMerge::convertAndFilterSourceStream(
     });
 }
 
+const ReadFromMerge::StorageListWithLocks & ReadFromMerge::getSelectedTables()
+{
+    filterTablesAndCreateChildPlans();
+    return selected_tables;
+}
+
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
 {
     filterTablesAndCreateChildPlans();
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 1fd7ff43bc2..cd3470af298 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -156,7 +156,7 @@ public:
 
     void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
-    const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
+    const StorageListWithLocks & getSelectedTables();
 
     /// Returns `false` if requested reading cannot be performed.
     bool requestReadingInOrder(InputOrderInfoPtr order_info_);

From 490a8bce9ee5b711c61b84d6ec69cc307c17d3aa Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 28 Dec 2023 18:01:08 +0000
Subject: [PATCH 070/204] Remove commented code.

---
 src/Storages/StorageMerge.cpp       | 32 -----------------------------
 src/Storages/StorageMerge.h         |  6 ------
 src/Storages/VirtualColumnUtils.cpp |  1 -
 3 files changed, 39 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index bf9dfc37440..fb208b64c78 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -317,28 +317,6 @@ void StorageMerge::read(
       */
     auto modified_context = Context::createCopy(local_context);
     modified_context->setSetting("optimize_move_to_prewhere", false);
-
-
-    // InputOrderInfoPtr input_sorting_info;
-    // if (query_info.order_optimizer)
-    // {
-    //     for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
-    //     {
-    //         auto storage_ptr = std::get<1>(*it);
-    //         auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-    //         auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, modified_context);
-    //         if (it == selected_tables.begin())
-    //             input_sorting_info = current_info;
-    //         else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
-    //             input_sorting_info.reset();
-
-    //         if (!input_sorting_info)
-    //             break;
-    //     }
-
-    //     query_info.input_order_info = input_sorting_info;
-    // }
-
     query_plan.addInterpreterContext(modified_context);
 
     /// What will be result structure depending on query processed stage in source tables?
@@ -346,11 +324,7 @@ void StorageMerge::read(
 
     auto step = std::make_unique<ReadFromMerge>(
         common_header,
-        //std::move(selected_tables),
-        //real_column_names,
         column_names,
-        // has_database_virtual_column,
-        // has_table_virtual_column,
         max_block_size,
         num_streams,
         shared_from_this(),
@@ -364,10 +338,7 @@ void StorageMerge::read(
 
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
-    //StorageListWithLocks selected_tables_,
     Names all_column_names_,
-    // bool has_database_virtual_column_,
-    // bool has_table_virtual_column_,
     size_t max_block_size,
     size_t num_streams,
     StoragePtr storage,
@@ -379,10 +350,7 @@ ReadFromMerge::ReadFromMerge(
     , required_max_block_size(max_block_size)
     , requested_num_streams(num_streams)
     , common_header(std::move(common_header_))
-    //, selected_tables(std::move(selected_tables_))
     , all_column_names(std::move(all_column_names_))
-    // , has_database_virtual_column(has_database_virtual_column_)
-    // , has_table_virtual_column(has_table_virtual_column_)
     , storage_merge(std::move(storage))
     , merge_storage_snapshot(std::move(storage_snapshot))
     , query_info(query_info_)
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index cd3470af298..95c373eab3b 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -144,8 +144,6 @@ public:
     ReadFromMerge(
         Block common_header_,
         Names all_column_names_,
-        // bool has_database_virtual_column_,
-        // bool has_table_virtual_column_,
         size_t max_block_size,
         size_t num_streams,
         StoragePtr storage,
@@ -223,10 +221,6 @@ private:
 
     using RowPolicyDataOpt = std::optional<RowPolicyData>;
 
-    // std::vector<Aliases> table_aliases;
-
-    // std::vector<RowPolicyDataOpt> table_row_policy_data_opts;
-
     struct ChildPlan
     {
         QueryPlan plan;
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index f5284eeb21b..8da5fdb3709 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -502,7 +502,6 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
                 const ActionsDAG::Node * res = node_copy.children.front();
                 /// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires
                 /// at least two arguments; also it can't be reduced to (256) because result type is different.
-                /// TODO: add CAST here
                 if (!res->result_type->equals(*node->result_type))
                 {
                     ActionsDAG tmp_dag;

From 8a90f12dc9974c543d65a72653b565758c7e128c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 28 Dec 2023 23:28:26 +0100
Subject: [PATCH 071/204] Speedup MIN/MAX for non numeric types

---
 .../AggregateFunctionMax.cpp                  | 83 ++++++++++++++++++-
 .../AggregateFunctionMin.cpp                  | 83 ++++++++++++++++++-
 2 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index e74224a24c3..a440aedb62c 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -19,7 +19,7 @@ public:
     explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }
 
     /// Specializations for native numeric types
-    ALWAYS_INLINE inline void addBatchSinglePlace(
+    void addBatchSinglePlace(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -27,7 +27,7 @@ public:
         Arena * arena,
         ssize_t if_argument_pos) const override;
 
-    ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
+    void addBatchSinglePlaceNotNull(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -74,7 +74,50 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while (if_flags[index] == 0 && index < row_end)
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
+    else
+    {
+        if (row_begin >= row_end)
+            return;
+
+        /// TODO: Introduce row_begin and row_end to getPermutation
+        if (row_begin != 0 || row_end != column.size())
+        {
+            size_t index = row_begin;
+            for (size_t i = index + 1; i < row_end; i++)
+            {
+                if (column.compareAt(i, index, column, nan_direction_hint) > 0)
+                    index = i;
+            }
+            this->data(place).changeIfGreater(column, index, arena);
+        }
+        else
+        {
+            constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Descending;
+            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
+            IColumn::Permutation permutation;
+            constexpr UInt64 limit = 1;
+            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            this->data(place).changeIfGreater(column, permutation[0], arena);
+        }
+    }
 }
 
 // NOLINTBEGIN(bugprone-macro-parentheses)
@@ -119,7 +162,39 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
+    else
+    {
+        size_t index = row_begin;
+        while ((null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
 }
 
 AggregateFunctionPtr createAggregateFunctionMax(
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 48758aa74b0..8d5d12fa626 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -20,7 +20,7 @@ public:
     explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }
 
     /// Specializations for native numeric types
-    ALWAYS_INLINE inline void addBatchSinglePlace(
+    void addBatchSinglePlace(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -28,7 +28,7 @@ public:
         Arena * arena,
         ssize_t if_argument_pos) const override;
 
-    ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
+    void addBatchSinglePlaceNotNull(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -75,7 +75,50 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while (if_flags[index] == 0 && index < row_end)
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
+    else
+    {
+        if (row_begin >= row_end)
+            return;
+
+        /// TODO: Introduce row_begin and row_end to getPermutation
+        if (row_begin != 0 || row_end != column.size())
+        {
+            size_t index = row_begin;
+            for (size_t i = index + 1; i < row_end; i++)
+            {
+                if (column.compareAt(i, index, column, nan_direction_hint) < 0)
+                    index = i;
+            }
+            this->data(place).changeIfLess(column, index, arena);
+        }
+        else
+        {
+            constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Ascending;
+            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
+            IColumn::Permutation permutation;
+            constexpr UInt64 limit = 1;
+            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            this->data(place).changeIfLess(column, permutation[0], arena);
+        }
+    }
 }
 
 // NOLINTBEGIN(bugprone-macro-parentheses)
@@ -120,7 +163,39 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
+    else
+    {
+        size_t index = row_begin;
+        while ((null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
 }
 
 AggregateFunctionPtr createAggregateFunctionMin(

From 062168c17797d3f5164a47d47fbee5d37f1b35dd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 29 Dec 2023 09:38:10 +0000
Subject: [PATCH 072/204] Fix a test.

---
 .../0_stateless/00578_merge_table_and_table_virtual_column.sql  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql b/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql
index c2bc334ea38..f292eb30648 100644
--- a/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql
+++ b/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql
@@ -13,6 +13,8 @@ CREATE TABLE numbers5 ENGINE = MergeTree ORDER BY number AS SELECT number FROM n
 SELECT count() FROM merge(currentDatabase(), '^numbers\\d+$');
 SELECT DISTINCT count() FROM merge(currentDatabase(), '^numbers\\d+$') GROUP BY number;
 
+SET optimize_aggregation_in_order = 0; -- FIXME : in order may happen before filter push down
+
 SET max_rows_to_read = 1000;
 
 SET max_threads = 'auto';

From e692b0a5bda00d14e109aeee1f1045b553183b10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 13:46:01 +0100
Subject: [PATCH 073/204] Move iota implementation to its own file

---
 src/Common/iota.cpp                           |  6 +++
 src/Common/iota.h                             | 42 +++++++++++++++++++
 .../QueryPlan/ReadFromSystemNumbersStep.cpp   | 29 ++-----------
 3 files changed, 52 insertions(+), 25 deletions(-)
 create mode 100644 src/Common/iota.cpp
 create mode 100644 src/Common/iota.h

diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
new file mode 100644
index 00000000000..7c0d28a66e0
--- /dev/null
+++ b/src/Common/iota.cpp
@@ -0,0 +1,6 @@
+#include <Common/iota.h>
+
+namespace DB
+{
+template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+}
diff --git a/src/Common/iota.h b/src/Common/iota.h
new file mode 100644
index 00000000000..d992032b77c
--- /dev/null
+++ b/src/Common/iota.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <base/defines.h>
+#include <base/types.h>
+#include <Common/Concepts.h>
+#include <Common/TargetSpecific.h>
+
+/// This is a replacement for std::iota to use dynamic dispatch
+/// Note that is only defined for containers with contiguous memory only
+
+namespace DB
+{
+
+/// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
+template <typename T>
+concept iota_supported_types = (is_any_of<T, UInt64>);
+
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
+    {
+        for (size_t i = 0; i < count; i++)
+            *(begin + i) = first_value + i;
+    })
+)
+
+template <iota_supported_types T>
+void iota(T * begin, size_t count, T first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
+extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+
+}
diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index a88203e0fca..329497d66d3 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -9,7 +9,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/System/StorageSystemNumbers.h>
-#include <Common/TargetSpecific.h>
+#include <Common/iota.h>
 #include <Common/typeid_cast.h>
 
 namespace DB
@@ -23,27 +23,6 @@ extern const int TOO_MANY_ROWS;
 namespace
 {
 
-MULTITARGET_FUNCTION_AVX2_SSE42(
-    MULTITARGET_FUNCTION_HEADER(void),
-    iotaImpl, MULTITARGET_FUNCTION_BODY((UInt64 * begin, UInt64 count, UInt64 first_value)
-    {
-        for (UInt64 i = 0; i < count; i++)
-            *(begin + i) = first_value + i;
-    })
-)
-
-static void iota(UInt64 * begin, UInt64 count, UInt64 first_value)
-{
-#if USE_MULTITARGET_CODE
-    if (isArchSupported(TargetArch::AVX2))
-        return iotaImplAVX2(begin, count, first_value);
-
-    if (isArchSupported(TargetArch::SSE42))
-        return iotaImplSSE42(begin, count, first_value);
-#endif
-    return iotaImpl(begin, count, first_value);
-}
-
 class NumbersSource : public ISource
 {
 public:
@@ -65,7 +44,7 @@ protected:
         size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
-        iota(pos, end - pos, curr);
+        iota(pos, static_cast<size_t>(end - pos), curr);
 
         next += step;
 
@@ -233,7 +212,7 @@ protected:
                     auto start_value_64 = static_cast<UInt64>(start_value);
                     auto end_value_64 = static_cast<UInt64>(end_value);
                     auto size = end_value_64 - start_value_64;
-                    iota(pos, size, start_value_64);
+                    iota(pos, static_cast<size_t>(size), start_value_64);
                     pos += size;
                 }
             };
@@ -242,7 +221,7 @@ protected:
             {
                 UInt64 start_value = first_value(range) + cursor.offset_in_range;
                 /// end_value will never overflow
-                iota(pos, need, start_value);
+                iota(pos, static_cast<size_t>(need), start_value);
                 pos += need;
 
                 provided += need;

From bda6104f84bdfce53115a728cd2e9d2f3251bc66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:38:22 +0100
Subject: [PATCH 074/204] Replace std::iota with DB::iota where possible

---
 ...ateFunctionLargestTriangleThreeBuckets.cpp |  5 ++--
 src/AggregateFunctions/StatCommon.h           |  3 +-
 src/Analyzer/Passes/FuseFunctionsPass.cpp     |  3 +-
 src/Columns/ColumnObject.cpp                  |  3 +-
 src/Columns/tests/gtest_column_sparse.cpp     |  3 +-
 src/Common/iota.cpp                           | 27 ++++++++++++++++++
 src/Common/iota.h                             | 28 +++----------------
 src/Common/tests/gtest_hash_table.cpp         |  3 +-
 .../HashedDictionaryParallelLoader.h          |  3 +-
 src/Dictionaries/PolygonDictionary.cpp        |  3 +-
 src/Dictionaries/PolygonDictionaryUtils.h     |  3 +-
 src/Functions/array/arrayRandomSample.cpp     |  3 +-
 src/Functions/array/arrayShuffle.cpp          |  3 +-
 src/Functions/translate.cpp                   |  5 ++--
 src/Interpreters/tests/gtest_filecache.cpp    |  3 +-
 15 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index 850a7c688ad..d5abdbc12fb 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -14,8 +14,9 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
-#include <Common/PODArray.h>
 #include <Common/assert_cast.h>
+#include <Common/PODArray.h>
+#include <Common/iota.h>
 #include <base/types.h>
 
 #include <boost/math/distributions/normal.hpp>
@@ -48,7 +49,7 @@ struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float
         // sort the this->x and this->y in ascending order of this->x using index
         std::vector<size_t> index(this->x.size());
 
-        std::iota(index.begin(), index.end(), 0);
+        iota(index.data(), index.size(), size_t(0));
         ::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
 
         SampleX temp_x{};
diff --git a/src/AggregateFunctions/StatCommon.h b/src/AggregateFunctions/StatCommon.h
index 23054e25189..8b1395ea95c 100644
--- a/src/AggregateFunctions/StatCommon.h
+++ b/src/AggregateFunctions/StatCommon.h
@@ -7,6 +7,7 @@
 #include <base/sort.h>
 
 #include <Common/ArenaAllocator.h>
+#include <Common/iota.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
@@ -30,7 +31,7 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
     const size_t size = values.size();
     /// Save initial positions, than sort indices according to the values.
     std::vector<size_t> indexes(size);
-    std::iota(indexes.begin(), indexes.end(), 0);
+    iota(indexes.data(), indexes.size(), size_t(0));
     std::sort(indexes.begin(), indexes.end(),
         [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
 
diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp
index e77b3ddcb20..443e13b7d9d 100644
--- a/src/Analyzer/Passes/FuseFunctionsPass.cpp
+++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp
@@ -1,5 +1,6 @@
 #include <Analyzer/Passes/FuseFunctionsPass.h>
 
+#include <Common/iota.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
@@ -184,7 +185,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector<QueryTreeNodePtr *> & nodes
     {
         /// Sort nodes and parameters in ascending order of quantile level
         std::vector<size_t> permutation(nodes.size());
-        std::iota(permutation.begin(), permutation.end(), 0);
+        iota(permutation.data(), permutation.size(), size_t(0));
         std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get<Float64>() < parameters[j].get<Float64>(); });
 
         std::vector<QueryTreeNodePtr *> new_nodes;
diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index 2052ec3c968..f7176568a1b 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -2,6 +2,7 @@
 #include <Columns/ColumnObject.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnArray.h>
+#include <Common/iota.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/getLeastSupertype.h>
 #include <DataTypes/DataTypeNothing.h>
@@ -838,7 +839,7 @@ MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
 void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const
 {
     res.resize(num_rows);
-    std::iota(res.begin(), res.end(), 0);
+    iota(res.data(), res.size(), size_t(0));
 }
 
 void ColumnObject::compareColumn(const IColumn & rhs, size_t rhs_row_num,
diff --git a/src/Columns/tests/gtest_column_sparse.cpp b/src/Columns/tests/gtest_column_sparse.cpp
index c3450ff91b4..02b15a2f5c4 100644
--- a/src/Columns/tests/gtest_column_sparse.cpp
+++ b/src/Columns/tests/gtest_column_sparse.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnSparse.h>
 #include <Columns/ColumnsNumber.h>
 
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <pcg_random.hpp>
 #include <gtest/gtest.h>
@@ -191,7 +192,7 @@ TEST(ColumnSparse, Permute)
         auto [sparse_src, full_src] = createColumns(n, k);
 
         IColumn::Permutation perm(n);
-        std::iota(perm.begin(), perm.end(), 0);
+        iota(perm.data(), perm.size(), size_t(0));
         std::shuffle(perm.begin(), perm.end(), rng);
 
         auto sparse_dst = sparse_src->permute(perm, limit);
diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
index 7c0d28a66e0..385d3b22207 100644
--- a/src/Common/iota.cpp
+++ b/src/Common/iota.cpp
@@ -1,6 +1,33 @@
+#include <base/defines.h>
 #include <Common/iota.h>
+#include <Common/TargetSpecific.h>
 
 namespace DB
 {
+
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
+    {
+        for (size_t i = 0; i < count; i++)
+            *(begin + i) = static_cast<T>(first_value + i);
+    })
+)
+
+template <iota_supported_types T>
+void iota(T * begin, size_t count, T first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
+template void iota(UInt8 * begin, size_t count, UInt8 first_value);
+template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 template void iota(UInt64 * begin, size_t count, UInt64 first_value);
 }
diff --git a/src/Common/iota.h b/src/Common/iota.h
index d992032b77c..485df4bd4f0 100644
--- a/src/Common/iota.h
+++ b/src/Common/iota.h
@@ -1,9 +1,7 @@
 #pragma once
 
-#include <base/defines.h>
 #include <base/types.h>
 #include <Common/Concepts.h>
-#include <Common/TargetSpecific.h>
 
 /// This is a replacement for std::iota to use dynamic dispatch
 /// Note that is only defined for containers with contiguous memory only
@@ -13,30 +11,12 @@ namespace DB
 
 /// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
 template <typename T>
-concept iota_supported_types = (is_any_of<T, UInt64>);
+concept iota_supported_types = (is_any_of<T, UInt8, UInt32, UInt64>);
 
-MULTITARGET_FUNCTION_AVX2_SSE42(
-    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
-    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
-    {
-        for (size_t i = 0; i < count; i++)
-            *(begin + i) = first_value + i;
-    })
-)
-
-template <iota_supported_types T>
-void iota(T * begin, size_t count, T first_value)
-{
-#if USE_MULTITARGET_CODE
-    if (isArchSupported(TargetArch::AVX2))
-        return iotaImplAVX2(begin, count, first_value);
-
-    if (isArchSupported(TargetArch::SSE42))
-        return iotaImplSSE42(begin, count, first_value);
-#endif
-    return iotaImpl(begin, count, first_value);
-}
+template <iota_supported_types T> void iota(T * begin, size_t count, T first_value);
 
+extern template void iota(UInt8 * begin, size_t count, UInt8 first_value);
+extern template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
 
 }
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index 72941126cfd..ab7c3872170 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -6,6 +6,7 @@
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/HashSet.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/iota.h>
 
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteHelpers.h>
@@ -20,7 +21,7 @@ namespace
 std::vector<UInt64> getVectorWithNumbersUpToN(size_t n)
 {
     std::vector<UInt64> res(n);
-    std::iota(res.begin(), res.end(), 0);
+    iota(res.data(), res.size(), size_t(0));
     return res;
 }
 
diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h
index 907a987555e..ec892af7e36 100644
--- a/src/Dictionaries/HashedDictionaryParallelLoader.h
+++ b/src/Dictionaries/HashedDictionaryParallelLoader.h
@@ -2,6 +2,7 @@
 
 #include <Dictionaries/IDictionary.h>
 #include <Common/CurrentThread.h>
+#include <Common/iota.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/ThreadPool.h>
@@ -53,7 +54,7 @@ public:
         LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads (with {} backlog)", shards, backlog);
 
         shards_slots.resize(shards);
-        std::iota(shards_slots.begin(), shards_slots.end(), 0);
+        iota(shards_slots.data(), shards_slots.size(), UInt64(0));
 
         for (size_t shard = 0; shard < shards; ++shard)
         {
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index df3ae439b00..6f800bd921d 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -5,6 +5,7 @@
 
 #include <base/sort.h>
 
+#include <Common/iota.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
@@ -507,7 +508,7 @@ const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset)
     if (!ptr_polygons)
         throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected a column containing arrays of points");
     offset.ring_offsets.assign(ptr_polygons->getOffsets());
-    std::iota(offset.polygon_offsets.begin(), offset.polygon_offsets.end(), 1);
+    iota<IColumn::Offsets::value_type>(offset.polygon_offsets.data(), offset.polygon_offsets.size(), IColumn::Offsets::value_type(1));
     offset.multi_polygon_offsets.assign(offset.polygon_offsets);
 
     return ptr_polygons->getDataPtr().get();
diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h
index 0238ef0b2b9..63d97e9dabd 100644
--- a/src/Dictionaries/PolygonDictionaryUtils.h
+++ b/src/Dictionaries/PolygonDictionaryUtils.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <base/types.h>
+#include <Common/iota.h>
 #include <Common/ThreadPool.h>
 #include <Poco/Logger.h>
 
@@ -184,7 +185,7 @@ public:
     {
         setBoundingBox();
         std::vector<size_t> order(polygons.size());
-        std::iota(order.begin(), order.end(), 0);
+        iota(order.data(), order.size(), size_t(0));
         root = makeCell(min_x, min_y, max_x, max_y, order);
     }
 
diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp
index 1e28e089a2a..40344efb077 100644
--- a/src/Functions/array/arrayRandomSample.cpp
+++ b/src/Functions/array/arrayRandomSample.cpp
@@ -1,5 +1,6 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnsNumber.h>
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Functions/FunctionFactory.h>
@@ -80,7 +81,7 @@ public:
             const size_t cur_samples = std::min(num_elements, samples);
 
             indices.resize(num_elements);
-            std::iota(indices.begin(), indices.end(), prev_array_offset);
+            iota(indices.data(), indices.size(), prev_array_offset);
             std::shuffle(indices.begin(), indices.end(), rng);
 
             for (UInt64 i = 0; i < cur_samples; i++)
diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp
index faa5ae47b29..10cb51d27d2 100644
--- a/src/Functions/array/arrayShuffle.cpp
+++ b/src/Functions/array/arrayShuffle.cpp
@@ -7,6 +7,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include <Common/assert_cast.h>
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <Common/shuffle.h>
 #include <Common/typeid_cast.h>
@@ -150,7 +151,7 @@ ColumnPtr FunctionArrayShuffleImpl<Traits>::executeGeneric(const ColumnArray & a
     size_t size = offsets.size();
     size_t nested_size = array.getData().size();
     IColumn::Permutation permutation(nested_size);
-    std::iota(std::begin(permutation), std::end(permutation), 0);
+    iota(permutation.data(), permutation.size(), IColumn::Permutation::value_type(0));
 
     ColumnArray::Offset current_offset = 0;
     for (size_t i = 0; i < size; ++i)
diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp
index 836cb4de2f3..ad5be7d9dfd 100644
--- a/src/Functions/translate.cpp
+++ b/src/Functions/translate.cpp
@@ -3,6 +3,7 @@
 #include <Columns/ColumnConst.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
+#include <Common/iota.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/UTF8Helpers.h>
 #include <Common/HashTable/HashMap.h>
@@ -31,7 +32,7 @@ struct TranslateImpl
         if (map_from.size() != map_to.size())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length");
 
-        std::iota(map.begin(), map.end(), 0);
+        iota(map.data(), map.size(), UInt8(0));
 
         for (size_t i = 0; i < map_from.size(); ++i)
         {
@@ -129,7 +130,7 @@ struct TranslateUTF8Impl
         if (map_from_size != map_to_size)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length");
 
-        std::iota(map_ascii.begin(), map_ascii.end(), 0);
+        iota(map_ascii.data(), map_ascii.size(), UInt32(0));
 
         const UInt8 * map_from_ptr = reinterpret_cast<const UInt8 *>(map_from.data());
         const UInt8 * map_from_end = map_from_ptr + map_from.size();
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index 1005e6090b8..3e061db4f56 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -11,6 +11,7 @@
 #include <memory>
 #include <thread>
 
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
@@ -788,7 +789,7 @@ TEST_F(FileCacheTest, writeBuffer)
 
         /// get random permutation of indexes
         std::vector<size_t> indexes(data.size());
-        std::iota(indexes.begin(), indexes.end(), 0);
+        iota(indexes.data(), indexes.size(), size_t(0));
         std::shuffle(indexes.begin(), indexes.end(), rng);
 
         for (auto i : indexes)

From bfc10bd234f2791fd48d30437e76df7d4a304a44 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 29 Dec 2023 15:16:12 +0100
Subject: [PATCH 075/204] an option to avoid waiting for inactive Replicated db
 replicas

---
 docs/en/operations/settings/settings.md       |  2 +
 src/Core/SettingsEnums.cpp                    |  2 +
 src/Core/SettingsEnums.h                      |  2 +
 src/Interpreters/executeDDLQueryOnCluster.cpp | 98 ++++++++++++++-----
 .../test_replicated_database/test.py          |  2 +-
 .../test.py                                   |  2 +-
 ...distributed_ddl_output_mode_long.reference |  4 +-
 .../02447_drop_database_replica.reference     |  8 ++
 .../02447_drop_database_replica.sh            |  3 +
 9 files changed, 96 insertions(+), 27 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 6e087467bb9..d4ee8106320 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3847,6 +3847,8 @@ Possible values:
 - `none` — Is similar to throw, but distributed DDL query returns no result set.
 - `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
 - `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
+- `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database
+- `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database
 
 Default value: `throw`.
 
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index c35e69977ed..2e6bb51176d 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -113,6 +113,8 @@ IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS,
     {{"none",         DistributedDDLOutputMode::NONE},
      {"throw",    DistributedDDLOutputMode::THROW},
      {"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT},
+     {"throw_only_active", DistributedDDLOutputMode::THROW_ONLY_ACTIVE},
+     {"null_status_on_timeout_only_active", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE},
      {"never_throw", DistributedDDLOutputMode::NEVER_THROW}})
 
 IMPLEMENT_SETTING_ENUM(StreamingHandleErrorMode, ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 2e71c96b954..0b2d47210a8 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -165,6 +165,8 @@ enum class DistributedDDLOutputMode
     THROW,
     NULL_STATUS_ON_TIMEOUT,
     NEVER_THROW,
+    THROW_ONLY_ACTIVE,
+    NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE,
 };
 
 DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 9486350a0f6..ba7638cd83f 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -200,8 +200,6 @@ public:
     Status prepare() override;
 
 private:
-    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
-
     static Block getSampleBlock(ContextPtr context_, bool hosts_to_wait);
 
     Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
@@ -228,7 +226,8 @@ private:
     NameSet waiting_hosts;  /// hosts from task host list
     NameSet finished_hosts; /// finished hosts from host list
     NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
-    Strings current_active_hosts; /// Hosts that were in active state at the last check
+    Strings current_active_hosts; /// Hosts that are currently executing the task
+    NameSet offline_hosts;  /// Hosts that are not currently running
     size_t num_hosts_finished = 0;
 
     /// Save the first detected error and throw it at the end of execution
@@ -237,7 +236,10 @@ private:
     Int64 timeout_seconds = 120;
     bool is_replicated_database = false;
     bool throw_on_timeout = true;
+    bool only_running_hosts = false;
+
     bool timeout_exceeded = false;
+    bool stop_waiting_offline_hosts = false;
 };
 
 
@@ -316,6 +318,8 @@ DDLQueryStatusSource::DDLQueryStatusSource(
     {
         waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
         is_replicated_database = true;
+        only_running_hosts = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE ||
+                            output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE;
     }
     else
     {
@@ -377,6 +381,38 @@ Chunk DDLQueryStatusSource::generateChunkWithUnfinishedHosts() const
     return Chunk(std::move(columns), unfinished_hosts.size());
 }
 
+static NameSet getOfflineHosts(const String & node_path, const NameSet & hosts_to_wait, const ZooKeeperPtr & zookeeper, Poco::Logger * log)
+{
+    fs::path replicas_path;
+    if (node_path.ends_with('/'))
+        replicas_path = fs::path(node_path).parent_path().parent_path().parent_path() / "replicas";
+    else
+        replicas_path = fs::path(node_path).parent_path().parent_path() / "replicas";
+
+    Strings paths;
+    Strings hosts_array;
+    for (const auto & host : hosts_to_wait)
+    {
+        hosts_array.push_back(host);
+        paths.push_back(replicas_path / host / "active");
+    }
+
+    NameSet offline;
+    auto res = zookeeper->tryGet(paths);
+    for (size_t i = 0; i < res.size(); ++i)
+        if (res[i].error == Coordination::Error::ZNONODE)
+            offline.insert(hosts_array[i]);
+
+    if (offline.size() == hosts_to_wait.size())
+    {
+        /// Avoid reporting that all hosts are offline
+        LOG_WARNING(log, "Did not find active hosts, will wait for all {} hosts. This should not happen often", offline.size());
+        return {};
+    }
+
+    return offline;
+}
+
 Chunk DDLQueryStatusSource::generate()
 {
     bool all_hosts_finished = num_hosts_finished >= waiting_hosts.size();
@@ -398,7 +434,7 @@ Chunk DDLQueryStatusSource::generate()
         if (isCancelled())
             return {};
 
-        if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
+        if (stop_waiting_offline_hosts || (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds))
         {
             timeout_exceeded = true;
 
@@ -406,7 +442,7 @@ Chunk DDLQueryStatusSource::generate()
             size_t num_active_hosts = current_active_hosts.size();
 
             constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
-                                                "There are {} unfinished hosts ({} of them are currently active), "
+                                                "There are {} unfinished hosts ({} of them are currently executing the task), "
                                                 "they are going to execute the query in background";
             if (throw_on_timeout)
             {
@@ -425,10 +461,7 @@ Chunk DDLQueryStatusSource::generate()
             return generateChunkWithUnfinishedHosts();
         }
 
-        if (num_hosts_finished != 0 || try_number != 0)
-        {
-            sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
-        }
+        sleepForMilliseconds(std::min<size_t>(1000, 50 * try_number));
 
         bool node_exists = false;
         Strings tmp_hosts;
@@ -440,9 +473,21 @@ Chunk DDLQueryStatusSource::generate()
             retries_ctl.retryLoop([&]()
             {
                 auto zookeeper = context->getZooKeeper();
-                node_exists = zookeeper->exists(node_path);
-                tmp_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / node_to_wait);
-                tmp_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
+                Strings paths = {String(fs::path(node_path) / node_to_wait), String(fs::path(node_path) / "active")};
+                auto res = zookeeper->tryGetChildren(paths);
+                for (size_t i = 0; i < res.size(); ++i)
+                    if (res[i].error != Coordination::Error::ZOK && res[i].error != Coordination::Error::ZNONODE)
+                        throw Coordination::Exception::fromPath(res[i].error, paths[i]);
+
+                if (res[0].error == Coordination::Error::ZNONODE)
+                    node_exists = zookeeper->exists(node_path);
+                else
+                    node_exists = true;
+                tmp_hosts = res[0].names;
+                tmp_active_hosts = res[1].names;
+
+                if (only_running_hosts)
+                    offline_hosts = getOfflineHosts(node_path, waiting_hosts, zookeeper, log);
             });
         }
 
@@ -460,6 +505,17 @@ Chunk DDLQueryStatusSource::generate()
 
         Strings new_hosts = getNewAndUpdate(tmp_hosts);
         ++try_number;
+
+        if (only_running_hosts)
+        {
+            size_t num_finished_or_offline = 0;
+            for (const auto & host : waiting_hosts)
+                num_finished_or_offline += finished_hosts.contains(host) || offline_hosts.contains(host);
+
+            if (num_finished_or_offline == waiting_hosts.size())
+                stop_waiting_offline_hosts = true;
+        }
+
         if (new_hosts.empty())
             continue;
 
@@ -470,7 +526,13 @@ Chunk DDLQueryStatusSource::generate()
         {
             ExecutionStatus status(-1, "Cannot obtain error message");
 
-            if (node_to_wait == "finished")
+            /// Replicated database retries in case of error, it should not write error status.
+#ifdef ABORT_ON_LOGICAL_ERROR
+            bool need_check_status = true;
+#else
+            bool need_check_status = !is_replicated_database;
+#endif
+            if (need_check_status)
             {
                 String status_data;
                 bool finished_exists = false;
@@ -496,7 +558,6 @@ Chunk DDLQueryStatusSource::generate()
             if (status.code != 0 && !first_exception
                 && context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
             {
-                /// Replicated database retries in case of error, it should not write error status.
                 if (is_replicated_database)
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "There was an error on {}: {} (probably it's a bug)", host_id, status.message);
 
@@ -555,15 +616,6 @@ IProcessor::Status DDLQueryStatusSource::prepare()
         return ISource::prepare();
 }
 
-Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
-{
-    Strings res;
-    Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
-    if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
-        throw Coordination::Exception::fromPath(code, node_path);
-    return res;
-}
-
 Strings DDLQueryStatusSource::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
 {
     Strings diff;
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 3ced82ebb57..1fc3fe37044 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -507,7 +507,7 @@ def test_alters_from_different_replicas(started_cluster):
 
     settings = {"distributed_ddl_task_timeout": 5}
     assert (
-        "There are 1 unfinished hosts (0 of them are currently active)"
+        "There are 1 unfinished hosts (0 of them are currently executing the task"
         in competing_node.query_and_get_error(
             "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
             settings=settings,
diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py
index b14581c1fe6..647626d8014 100644
--- a/tests/integration/test_replicated_database_cluster_groups/test.py
+++ b/tests/integration/test_replicated_database_cluster_groups/test.py
@@ -96,7 +96,7 @@ def test_cluster_groups(started_cluster):
     main_node_2.stop_clickhouse()
     settings = {"distributed_ddl_task_timeout": 5}
     assert (
-        "There are 1 unfinished hosts (0 of them are currently active)"
+        "There are 1 unfinished hosts (0 of them are currently executing the task)"
         in main_node_1.query_and_get_error(
             "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
             settings=settings,
diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
index 39979a98bde..b9a66a1e1a9 100644
--- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
+++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
@@ -3,7 +3,7 @@ Received exception from server:
 Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.none already exists. (TABLE_ALREADY_EXISTS)
 (query: create table none on cluster test_shard_localhost (n int) engine=Memory;)
 Received exception from server:
-Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
+Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
 (query: drop table if exists none on cluster test_unavailable_shard;)
 throw
 localhost	9000	0		0	0
@@ -12,7 +12,7 @@ Code: 57. Error: Received from localhost:9000. Error: There was an error on [loc
 (query: create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;)
 localhost	9000	0		1	0
 Received exception from server:
-Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
+Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
 (query: drop table if exists throw on cluster test_unavailable_shard;)
 null_status_on_timeout
 localhost	9000	0		0	0
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index f2b41569540..8ad9008057f 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -12,10 +12,18 @@ t
 2
 rdb_default	1	1	s1	r1	1
 2
+s1	r1	OK	2	0
+s2	r1	QUEUED	2	0
+s1	r2	QUEUED	2	0
+s1	r1	OK	2	0
+s2	r1	QUEUED	2	0
+s1	r2	QUEUED	2	0
 2
 rdb_default	1	1	s1	r1	1
 rdb_default	1	2	s1	r2	0
 2
 2
 t
+t2
+t3
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index d5b3ceef46a..388af3fad74 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -32,6 +32,9 @@ $CLICKHOUSE_CLIENT -q "system sync database replica $db"
 $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log"
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log"
+
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"
 $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null

From c7fa93d704805a0432428cb59ce3cf85d2f77f1b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 14:38:59 +0100
Subject: [PATCH 076/204] Add infrastructure for testing replicated MergeTree
 queue

- replicated_queue_fail_next_entry - to fail next queue entry
- replicated_queue_unfail_entries - to "unfail" all queue entries (if
  any)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Common/FailPoint.cpp                      |  2 ++
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |  3 +++
 src/Storages/StorageReplicatedMergeTree.cpp   | 19 +++++++++++++++++++
 4 files changed, 25 insertions(+)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 9222a27afdf..577a83e40b9 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -589,6 +589,7 @@
     M(707, GCP_ERROR) \
     M(708, ILLEGAL_STATISTIC) \
     M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
+    M(710, FAULT_INJECTED) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index 9665788dac2..f29aee0cdcc 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -34,6 +34,8 @@ static struct InitFiu
 
 #define APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE) \
     ONCE(replicated_merge_tree_commit_zk_fail_after_op) \
+    ONCE(replicated_queue_fail_next_entry) \
+    REGULAR(replicated_queue_unfail_entries) \
     ONCE(replicated_merge_tree_insert_quorum_fail_0) \
     REGULAR(replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault) \
     REGULAR(use_delayed_remote_source) \
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index 4821a80a29b..b3ab3d75dcb 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -171,6 +171,9 @@ struct ReplicatedMergeTreeLogEntryData
     /// The quorum value (for GET_PART) is a non-zero value when the quorum write is enabled.
     size_t quorum = 0;
 
+    /// Used only in tests for permanent fault injection for particular queue entry.
+    bool fault_injected = false;
+
     /// If this MUTATE_PART entry caused by alter(modify/drop) query.
     bool isAlterMutation() const
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f143a2ec78b..a68294d3dce 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -18,6 +18,7 @@
 #include <Common/thread_local_rng.h>
 #include <Common/typeid_cast.h>
 #include <Common/ThreadFuzzer.h>
+#include <Common/FailPoint.h>
 
 #include <Core/ServerUUID.h>
 
@@ -147,6 +148,12 @@ namespace CurrentMetrics
 namespace DB
 {
 
+namespace FailPoints
+{
+    extern const char replicated_queue_fail_next_entry[];
+    extern const char replicated_queue_unfail_entries[];
+}
+
 namespace ErrorCodes
 {
     extern const int CANNOT_READ_ALL_DATA;
@@ -191,6 +198,7 @@ namespace ErrorCodes
     extern const int TABLE_IS_DROPPED;
     extern const int CANNOT_BACKUP_TABLE;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int FAULT_INJECTED;
 }
 
 namespace ActionLocks
@@ -1931,6 +1939,17 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo
 
 bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry)
 {
+    fiu_do_on(FailPoints::replicated_queue_fail_next_entry,
+    {
+        entry.fault_injected = true;
+    });
+    fiu_do_on(FailPoints::replicated_queue_unfail_entries,
+    {
+        entry.fault_injected = false;
+    });
+    if (entry.fault_injected)
+        throw Exception(ErrorCodes::FAULT_INJECTED, "Injecting fault for log entry {}", entry.getDescriptionForLogs(format_version));
+
     if (entry.type == LogEntry::DROP_RANGE || entry.type == LogEntry::DROP_PART)
     {
         executeDropRange(entry);

From 7efe41357598d007f74d7ecc8b61bebbd1a6cb18 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 17:04:12 +0100
Subject: [PATCH 077/204] Add a test for ALTER_METADATA vs MERGE_PARTS race
 (CHECKSUM_DOESNT_MATCH)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 ...metadata_merge_checksum_mismatch.reference |  1 +
 ..._alter_metadata_merge_checksum_mismatch.sh | 98 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
 create mode 100755 tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh

diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
new file mode 100644
index 00000000000..0045aab2e30
--- /dev/null
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
@@ -0,0 +1 @@
+all_0_2_2_1	RegularMerge	MergeParts	CHECKSUM_DOESNT_MATCH
diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
new file mode 100755
index 00000000000..20cffcd9f65
--- /dev/null
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+# Tag no-parallel: failpoint is in use
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+set -e
+
+function wait_part()
+{
+    local table=$1 && shift
+    local part=$1 && shift
+
+    for ((i = 0; i < 100; ++i)); do
+        if [[ $($CLICKHOUSE_CLIENT -q "select count() from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and active and name = '$part'") -eq 1 ]]; then
+            return
+        fi
+        sleep 0.1
+    done
+
+    echo "Part $table::$part does not appeared" >&2
+}
+
+function restore_failpoints()
+{
+    # restore entry error with failpoints (to avoid endless errors in logs)
+    $CLICKHOUSE_CLIENT -nm -q "
+        system enable failpoint replicated_queue_unfail_entries;
+        system sync replica $failed_replica;
+        system disable failpoint replicated_queue_unfail_entries;
+    "
+}
+trap restore_failpoints EXIT
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data_r1;
+    drop table if exists data_r2;
+
+    create table data_r1 (key Int, value Int, index value_idx value type minmax) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/data', '{table}') order by key;
+    create table data_r2 (key Int, value Int, index value_idx value type minmax) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/data', '{table}') order by key;
+
+    insert into data_r1 (key) values (1); -- part all_0_0_0
+"
+
+# will fail ALTER_METADATA on one of replicas
+$CLICKHOUSE_CLIENT -nm -q "
+    system enable failpoint replicated_queue_fail_next_entry;
+    alter table data_r1 drop index value_idx settings alter_sync=0; -- part all_0_0_0_1
+
+    system sync replica data_r1 pull;
+    system sync replica data_r2 pull;
+"
+
+# replica on which ALTER_METADATA had been succeed
+success_replica=
+for ((i = 0; i < 100; ++i)); do
+    for table in data_r1 data_r2; do
+        mutations="$($CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = '$CLICKHOUSE_DATABASE' and table = '$table' and is_done = 0")"
+        if [[ $mutations -eq 0 ]]; then
+            success_replica=$table
+        fi
+    done
+    if [[ -n $success_replica ]]; then
+        break
+    fi
+    sleep 0.1
+done
+case "$success_replica" in
+    data_r1) failed_replica=data_r2;;
+    data_r2) failed_replica=data_r1;;
+    *) echo "ALTER_METADATA does not succeed on any replica" >&2 && exit 1;;
+esac
+mutations_on_failed_replica="$($CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = '$CLICKHOUSE_DATABASE' and table = '$failed_replica' and is_done = 0")"
+if [[ $mutations_on_failed_replica != 1 ]]; then
+    echo "Wrong number of mutations on failed replica $failed_replica, mutations $mutations_on_failed_replica" >&2
+fi
+
+# This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge)
+$CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1
+
+$CLICKHOUSE_CLIENT -nm -q "
+    insert into $success_replica (key) values (2); -- part all_2_2_0
+    optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1
+    system sync replica $failed_replica pull;
+"
+
+# Wait for part to be merged on failed replica, that will trigger CHECKSUM_DOESNT_MATCH
+wait_part "$failed_replica" all_0_2_2_1
+
+# Already after part fetched there will CHECKSUM_DOESNT_MATCH in case of ALTER_METADATA re-order, but let's restore fail points and sync failed replica first.
+restore_failpoints
+trap '' EXIT
+
+$CLICKHOUSE_CLIENT -q "system flush logs"
+# check for error "Different number of files: 5 compressed (expected 3) and 2 uncompressed ones (expected 2). (CHECKSUM_DOESNT_MATCH)"
+$CLICKHOUSE_CLIENT -q "select part_name, merge_reason, event_type, errorCodeToName(error) from system.part_log where database = '$CLICKHOUSE_DATABASE' and error != 0 order by event_time_microseconds"

From a12df35be4c6954e683dbea53c00599ca6a96d5d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 17:47:22 +0100
Subject: [PATCH 078/204] Eliminate possible race between ALTER_METADATA and
 MERGE_PARTS

v2: move metadata version check after checking that the part is not covering part
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 16 ++++++++++++++--
 src/Storages/StorageReplicatedMergeTree.cpp      |  6 ++----
 ...er_metadata_merge_checksum_mismatch.reference |  1 -
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index 9be31859a19..3f0b8c8b247 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -43,6 +43,8 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     LOG_TRACE(log, "Executing log entry to merge parts {} to {}",
         fmt::join(entry.source_parts, ", "), entry.new_part_name);
 
+    StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr();
+    int32_t metadata_version = metadata_snapshot->getMetadataVersion();
     const auto storage_settings_ptr = storage.getSettings();
 
     if (storage_settings_ptr->always_fetch_merged_part)
@@ -129,6 +131,18 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
             };
         }
 
+        int32_t part_metadata_version = source_part_or_covering->getMetadataVersion();
+        if (part_metadata_version > metadata_version)
+        {
+            LOG_DEBUG(log, "Source part metadata version {} is newer then the table metadata version {}. ALTER_METADATA is still in progress.",
+                part_metadata_version, metadata_version);
+            return PrepareResult{
+                .prepared_successfully = false,
+                .need_to_check_missing_part_in_fetch = false,
+                .part_log_writer = {}
+            };
+        }
+
         parts.push_back(source_part_or_covering);
     }
 
@@ -176,8 +190,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     /// It will live until the whole task is being destroyed
     table_lock_holder = storage.lockForShare(RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations);
 
-    StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr();
-
     auto future_merged_part = std::make_shared<FutureMergedMutatedPart>(parts, entry.new_part_format);
     if (future_merged_part->name != entry.new_part_name)
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a68294d3dce..5233393a11f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1745,14 +1745,12 @@ bool StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(
 
         if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash())
         {
-            /// Currently there are two (known) cases when it may happen:
+            /// Currently there are only one (known) cases when it may happen:
             ///  - KILL MUTATION query had removed mutation before all replicas have executed assigned MUTATE_PART entries.
             ///    Some replicas may skip this mutation and update part version without actually applying any changes.
             ///    It leads to mismatching checksum if changes were applied on other replicas.
-            ///  - ALTER_METADATA and MERGE_PARTS were reordered on some replicas.
-            ///    It may lead to different number of columns in merged parts on these replicas.
             throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Part {} from {} has different columns hash "
-                            "(it may rarely happen on race condition with KILL MUTATION or ALTER COLUMN).", part_name, replica);
+                            "(it may rarely happen on race condition with KILL MUTATION).", part_name, replica);
         }
 
         replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
index 0045aab2e30..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
@@ -1 +0,0 @@
-all_0_2_2_1	RegularMerge	MergeParts	CHECKSUM_DOESNT_MATCH

From 5521e5d9b16a7527d81cf97742c548570769d143 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 29 Dec 2023 15:58:01 +0000
Subject: [PATCH 079/204] Refactor StorageHDFS and StorageFile virtual columns
 filtering

---
 src/Storages/HDFS/StorageHDFS.cpp   | 185 ++++++++++++++++++++++++----
 src/Storages/HDFS/StorageHDFS.h     |  12 +-
 src/Storages/StorageFile.cpp        | 158 ++++++++++++++++++++----
 src/Storages/StorageFile.h          |  16 ++-
 src/Storages/VirtualColumnUtils.cpp |  36 ++++++
 src/Storages/VirtualColumnUtils.h   |  19 +++
 6 files changed, 369 insertions(+), 57 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index fdbb5e9f171..9d719413c8d 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -15,6 +15,8 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/CompressionMethod.h>
@@ -408,6 +410,35 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
 class HDFSSource::DisclosedGlobIterator::Impl
 {
 public:
+
+    Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    {
+        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
+        uris = getPathsList(path_from_uri, uri_without_path, context);
+        ActionsDAGPtr filter_dag;
+        if (!uris.empty())
+             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, uris[0].path);
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(uris.size());
+            for (const auto & path_with_info : uris)
+                paths.push_back(path_with_info.path);
+
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
+        }
+        auto file_progress_callback = context->getFileProgressCallback();
+
+        for (auto & elem : uris)
+        {
+            elem.path = uri_without_path + elem.path;
+            if (file_progress_callback && elem.info)
+                file_progress_callback(FileProgress(0, elem.info->size));
+        }
+        uris_iter = uris.begin();
+    }
+
     Impl(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
@@ -456,21 +487,21 @@ private:
 class HDFSSource::URISIterator::Impl : WithContext
 {
 public:
-    explicit Impl(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
+    explicit Impl(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
         : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
     {
-        ASTPtr filter_ast;
+        ActionsDAGPtr filter_dag;
         if (!uris.empty())
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first, getContext());
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first);
 
-        if (filter_ast)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(uris.size());
             for (const auto & uri : uris)
                 paths.push_back(getPathFromUriAndUriWithoutPath(uri).first);
 
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, getContext(), filter_ast);
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext());
         }
 
         if (!uris.empty())
@@ -520,13 +551,16 @@ private:
 HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, query, virtual_columns, context)) {}
 
+HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
+
 StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next()
 {
     return pimpl->next();
 }
 
-HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, query, virtual_columns, context))
+HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, predicate, virtual_columns, context))
 {
 }
 
@@ -541,8 +575,8 @@ HDFSSource::HDFSSource(
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IteratorWrapper> file_iterator_,
-    bool need_only_count_,
-    const SelectQueryInfo & query_info_)
+    bool need_only_count_)
+    //const SelectQueryInfo & query_info_)
     : ISource(info.source_header, false)
     , WithContext(context_)
     , storage(std::move(storage_))
@@ -553,7 +587,7 @@ HDFSSource::HDFSSource(
     , file_iterator(file_iterator_)
     , columns_description(info.columns_description)
     , need_only_count(need_only_count_)
-    , query_info(query_info_)
+    //, query_info(query_info_)
 {
     initialize();
 }
@@ -843,7 +877,82 @@ bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_);
 }
 
-Pipe StorageHDFS::read(
+class ReadFromHDFS : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromHDFS"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromHDFS(
+        Block sample_block,
+        std::vector<String> uris_,
+        bool distributed_processing_,
+        NamesAndTypesList virtual_columns_,
+        bool is_path_with_globs_,
+        ReadFromFormatInfo info_,
+        bool need_only_count_,
+        std::shared_ptr<StorageHDFS> storage_,
+        // StorageSnapshotPtr storage_snapshot_,
+        // const StorageEmbeddedRocksDB & storage_,
+        // SelectQueryInfo query_info_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , uris(std::move(uris_))
+        , distributed_processing(distributed_processing_)
+        , virtual_columns(std::move(virtual_columns_))
+        , is_path_with_globs(is_path_with_globs_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , storage(std::move(storage_))
+        // , storage_snapshot(std::move(storage_snapshot_))
+        // , storage(storage_)
+        // , query_info(std::move(query_info_))
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::vector<String> uris;
+    const bool distributed_processing;
+    NamesAndTypesList virtual_columns;
+    bool is_path_with_globs;
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+    std::shared_ptr<StorageHDFS> storage;
+
+    // StorageSnapshotPtr storage_snapshot;
+    // const StorageEmbeddedRocksDB & storage;
+    // SelectQueryInfo query_info;
+    ContextPtr context;
+
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromHDFS::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageHDFS::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -852,18 +961,44 @@ Pipe StorageHDFS::read(
     size_t max_block_size,
     size_t num_streams)
 {
-    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper{nullptr};
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && context_->getSettingsRef().optimize_count_from_files;
+
+    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromHDFS>(
+        read_from_format_info.source_header,
+        uris,
+        distributed_processing,
+        virtual_columns,
+        is_path_with_globs,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(this_ptr),
+        context_,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
     if (distributed_processing)
     {
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
-            [callback = context_->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
+            [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
                 return StorageHDFS::PathWithInfo{callback(), std::nullopt};
         });
     }
     else if (is_path_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], query_info.query, virtual_columns, context_);
+        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], predicate, virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
         {
             return glob_iterator->next();
@@ -871,31 +1006,31 @@ Pipe StorageHDFS::read(
     }
     else
     {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, query_info.query, virtual_columns, context_);
+        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, predicate, virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
         {
             return uris_iterator->next();
         });
     }
+}
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context_->getSettingsRef().optimize_count_from_files;
+void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
 
     Pipes pipes;
-    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<HDFSSource>(
-            read_from_format_info,
-            this_ptr,
-            context_,
+            info,
+            storage,
+            context,
             max_block_size,
             iterator_wrapper,
-            need_only_count,
-            query_info));
+            need_only_count)); //,
+            //query_info));
     }
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 18eeb787d77..cee1b674eb7 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -51,7 +51,8 @@ public:
 
     String getName() const override { return "HDFS"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -115,6 +116,7 @@ public:
     {
         public:
             DisclosedGlobIterator(const String & uri_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+            DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
             class Impl;
@@ -125,7 +127,7 @@ public:
     class URISIterator
     {
         public:
-            URISIterator(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+            URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
             class Impl;
@@ -142,8 +144,8 @@ public:
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IteratorWrapper> file_iterator_,
-        bool need_only_count_,
-        const SelectQueryInfo & query_info_);
+        bool need_only_count_);
+        //const SelectQueryInfo & query_info_);
 
     String getName() const override;
 
@@ -162,7 +164,7 @@ private:
     ColumnsDescription columns_description;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    SelectQueryInfo query_info;
+    //SelectQueryInfo query_info;
 
     std::unique_ptr<ReadBuffer> read_buf;
     std::shared_ptr<IInputFormat> input_format;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 25bb6691ff6..b040f452410 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -9,6 +9,7 @@
 
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTIdentifier_fwd.h>
@@ -44,6 +45,8 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -947,6 +950,23 @@ StorageFileSource::FilesIterator::FilesIterator(
         VirtualColumnUtils::filterByPathOrFile(files, files, query, virtual_columns, context_, filter_ast);
 }
 
+StorageFileSource::FilesIterator::FilesIterator(
+    const Strings & files_,
+    std::optional<StorageFile::ArchiveInfo> archive_info_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns,
+    ContextPtr context_,
+    bool distributed_processing_)
+    : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
+{
+    ActionsDAGPtr filter_dag;
+    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0]);
+
+    if (filter_dag)
+        VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_);
+}
+
 String StorageFileSource::FilesIterator::next()
 {
     if (distributed_processing)
@@ -974,16 +994,13 @@ const String & StorageFileSource::FilesIterator::getFileNameInArchive()
 StorageFileSource::StorageFileSource(
     const ReadFromFormatInfo & info,
     std::shared_ptr<StorageFile> storage_,
-    const StorageSnapshotPtr & storage_snapshot_,
     ContextPtr context_,
-    const SelectQueryInfo & query_info_,
     UInt64 max_block_size_,
     FilesIteratorPtr files_iterator_,
     std::unique_ptr<ReadBuffer> read_buf_,
     bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , storage(std::move(storage_))
-    , storage_snapshot(storage_snapshot_)
     , files_iterator(std::move(files_iterator_))
     , read_buf(std::move(read_buf_))
     , columns_description(info.columns_description)
@@ -991,7 +1008,6 @@ StorageFileSource::StorageFileSource(
     , requested_virtual_columns(info.requested_virtual_columns)
     , block_for_format(info.format_header)
     , context(context_)
-    , query_info(query_info_)
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
 {
@@ -1322,14 +1338,87 @@ std::optional<size_t> StorageFileSource::tryGetNumRowsFromCache(const String & p
     return schema_cache.tryGetNumRows(key, get_last_mod_time);
 }
 
-Pipe StorageFile::read(
+class ReadFromFile : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromFile"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromFile(
+        Block sample_block,
+        std::shared_ptr<StorageFile> storage_,
+        std::vector<std::string> paths_,
+        std::optional<StorageFile::ArchiveInfo> archive_info_,
+        NamesAndTypesList virtual_columns_,
+        bool distributed_processing_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        size_t total_bytes_to_read_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , paths(std::move(paths_))
+        , archive_info(std::move(archive_info_))
+        , virtual_columns(std::move(virtual_columns_))
+        , distributed_processing(distributed_processing_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , total_bytes_to_read(total_bytes_to_read_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , max_num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageFile> storage;
+
+    std::vector<std::string> paths;
+    std::optional<StorageFile::ArchiveInfo> archive_info;
+
+    NamesAndTypesList virtual_columns;
+    const bool distributed_processing;
+
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+
+    size_t total_bytes_to_read;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    const size_t max_num_streams;
+
+    std::shared_ptr<StorageFileSource::FilesIterator> files_iterator;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromFile::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageFile::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
     ContextPtr context,
     QueryProcessingStage::Enum /*processed_stage*/,
     size_t max_block_size,
-    const size_t max_num_streams)
+    size_t num_streams)
 {
     if (use_table_fd)
     {
@@ -1346,17 +1435,48 @@ Pipe StorageFile::read(
 
         if (p->size() == 1 && !fs::exists(p->at(0)))
         {
-            if (context->getSettingsRef().engine_file_empty_if_not_exists)
-                return Pipe(std::make_shared<NullSource>(storage_snapshot->getSampleBlockForColumns(column_names)));
-            else
+            if (!context->getSettingsRef().engine_file_empty_if_not_exists)
                 throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p->at(0));
+
+            auto header = storage_snapshot->getSampleBlockForColumns(column_names);
+            InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context);
+            return;
         }
     }
 
-    auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, query_info.query, virtual_columns, context, distributed_processing);
-
     auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
 
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals());
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && context->getSettingsRef().optimize_count_from_files;
+
+    auto reading = std::make_unique<ReadFromFile>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        paths,
+        archive_info,
+        virtual_columns,
+        distributed_processing,
+        std::move(read_from_format_info),
+        need_only_count,
+        total_bytes_to_read,
+        context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (files_iterator)
+        return;
+
+    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, predicate, virtual_columns, context, distributed_processing);
+}
+
+void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
@@ -1377,10 +1497,6 @@ Pipe StorageFile::read(
     if (progress_callback && !archive_info)
         progress_callback(FileProgress(0, total_bytes_to_read));
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context->getSettingsRef().optimize_count_from_files;
-
     for (size_t i = 0; i < num_streams; ++i)
     {
         /// In case of reading from fd we have to check whether we have already created
@@ -1388,22 +1504,20 @@ Pipe StorageFile::read(
         /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed
         /// to prevent data race in case of parallel reads.
         std::unique_ptr<ReadBuffer> read_buffer;
-        if (has_peekable_read_buffer_from_fd.exchange(false))
-            read_buffer = std::move(peekable_read_buffer_from_fd);
+        if (storage->has_peekable_read_buffer_from_fd.exchange(false))
+            read_buffer = std::move(storage->peekable_read_buffer_from_fd);
 
         pipes.emplace_back(std::make_shared<StorageFileSource>(
-            read_from_format_info,
-            this_ptr,
-            storage_snapshot,
+            info,
+            storage,
             context,
-            query_info,
             max_block_size,
             files_iterator,
             std::move(read_buffer),
             need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 1fd3f2e0edf..ecb9e01b862 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -53,7 +53,8 @@ public:
 
     std::string getName() const override { return "File"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -137,6 +138,7 @@ public:
 protected:
     friend class StorageFileSource;
     friend class StorageFileSink;
+    friend class ReadFromFile;
 
 private:
     void setStorageMetadata(CommonArguments args);
@@ -199,6 +201,14 @@ public:
             ContextPtr context_,
             bool distributed_processing_ = false);
 
+        explicit FilesIterator(
+            const Strings & files_,
+            std::optional<StorageFile::ArchiveInfo> archive_info_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns,
+            ContextPtr context_,
+            bool distributed_processing_ = false);
+
         String next();
 
         bool isReadFromArchive() const
@@ -234,9 +244,7 @@ private:
     StorageFileSource(
         const ReadFromFormatInfo & info,
         std::shared_ptr<StorageFile> storage_,
-        const StorageSnapshotPtr & storage_snapshot_,
         ContextPtr context_,
-        const SelectQueryInfo & query_info_,
         UInt64 max_block_size_,
         FilesIteratorPtr files_iterator_,
         std::unique_ptr<ReadBuffer> read_buf_,
@@ -269,7 +277,6 @@ private:
     std::optional<size_t> tryGetNumRowsFromCache(const String & path, time_t last_mod_time) const;
 
     std::shared_ptr<StorageFile> storage;
-    StorageSnapshotPtr storage_snapshot;
     FilesIteratorPtr files_iterator;
     String current_path;
     std::optional<size_t> current_file_size;
@@ -290,7 +297,6 @@ private:
     Block block_for_format;
 
     ContextPtr context;    /// TODO Untangle potential issues with context lifetime.
-    SelectQueryInfo query_info;
     UInt64 max_block_size;
 
     bool finished_generate = false;
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index aed06fb0540..7690e160255 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -390,6 +390,42 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s
     block.getByName("_idx").column->assumeMutableRef().insert(idx);
 }
 
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example)
+{
+    if (!predicate || virtual_columns.empty())
+        return {};
+
+    Block block;
+    for (const auto & column : virtual_columns)
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
+    /// Create a block with one row to construct filter
+    /// Append "idx" column as the filter result
+    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
+    addPathAndFileToVirtualColumns(block, path_example, 0);
+    return splitFilterDagForAllowedInputs(predicate, block);
+}
+
+ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+{
+    Block block;
+    for (const auto & column : virtual_columns)
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
+    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
+
+    for (size_t i = 0; i != paths.size(); ++i)
+        addPathAndFileToVirtualColumns(block, paths[i], i);
+
+    filterBlockWithDAG(dag, block, context);
+
+    return block.getByName("_idx").column;
+}
+
 ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context)
 {
     if (!query || virtual_columns.empty())
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index e22b9742888..4f9636b4213 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -77,6 +77,25 @@ void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & pa
     sources = std::move(filtered_sources);
 }
 
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example);
+
+ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+
+template <typename T>
+void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+{
+    auto indexes_column = getFilterByPathAndFileIndexes(paths, dag, virtual_columns, context);
+    const auto & indexes = typeid_cast<const ColumnUInt64 &>(*indexes_column).getData();
+    if (indexes.size() == sources.size())
+        return;
+
+    std::vector<T> filtered_sources;
+    filtered_sources.reserve(indexes.size());
+    for (auto index : indexes)
+        filtered_sources.emplace_back(std::move(sources[index]));
+    sources = std::move(filtered_sources);
+}
+
 void addRequestedPathFileAndSizeVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename = nullptr);
 }

From db97a6998901aeb0a60f2a9cb57bcb98a75881e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 17:00:01 +0100
Subject: [PATCH 080/204] Add perf tests with tuples

---
 tests/performance/agg_functions_min_max_any.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml
index 2926a5ed3c8..f8469244643 100644
--- a/tests/performance/agg_functions_min_max_any.xml
+++ b/tests/performance/agg_functions_min_max_any.xml
@@ -87,4 +87,9 @@
 <query>select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
 <query>select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
 
+<!-- Test with tuples (useful when you want to keep 2 columns of the same row) -->
+<query>select min((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select max((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select any((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select anyHeavy((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
 </test>

From a38b3b9a7945fcca64f42a230ac9df808790a70a Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 29 Dec 2023 17:02:02 +0100
Subject: [PATCH 081/204] Fix test

---
 ..._materialized_view_with_dropped_target_table_no_exception.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
index 744b2578617..af6dbf24473 100644
--- a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
@@ -1,3 +1,4 @@
+set ignore_materialized_views_with_dropped_target_table = 1;
 drop table if exists from_table;
 drop table if exists to_table;
 drop table if exists mv;

From 4b7fcfbc75d5ffe5d4331f2370d43537e504bc44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:56:08 +0100
Subject: [PATCH 082/204] Use iota in more places

---
 src/AggregateFunctions/QuantilesCommon.h |  4 ++--
 src/Columns/IColumnImpl.h                | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/AggregateFunctions/QuantilesCommon.h b/src/AggregateFunctions/QuantilesCommon.h
index 3dda0119485..afbca84b827 100644
--- a/src/AggregateFunctions/QuantilesCommon.h
+++ b/src/AggregateFunctions/QuantilesCommon.h
@@ -6,6 +6,7 @@
 
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <Common/NaNUtils.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -63,10 +64,9 @@ struct QuantileLevels
 
             if (isNaN(levels[i]) || levels[i] < 0 || levels[i] > 1)
                 throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Quantile level is out of range [0..1]");
-
-            permutation[i] = i;
         }
 
+        iota(permutation.data(), size, Permutation::value_type(0));
         ::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; });
     }
 };
diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index 0eab9452813..8e0bf0014f2 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -6,10 +6,11 @@
   * implementation.
   */
 
-#include <Columns/IColumn.h>
-#include <Common/PODArray.h>
-#include <base/sort.h>
 #include <algorithm>
+#include <Columns/IColumn.h>
+#include <base/sort.h>
+#include <Common/PODArray.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -299,8 +300,7 @@ void IColumn::getPermutationImpl(
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, Permutation::value_type(0));
 
     if (limit)
     {

From ed6b9703a1a4848949f6e6f37a241a0cffb17c96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 15:57:36 +0100
Subject: [PATCH 083/204] More iota

---
 src/Columns/ColumnAggregateFunction.cpp        | 18 +++++++++---------
 src/Columns/ColumnConst.cpp                    |  8 ++++----
 src/Columns/ColumnDecimal.cpp                  | 15 +++++++--------
 src/Columns/ColumnSparse.cpp                   | 14 +++++++-------
 src/Columns/ColumnTuple.cpp                    | 12 ++++++------
 src/Columns/ColumnVector.cpp                   | 18 +++++++++---------
 src/Columns/IColumnDummy.cpp                   | 10 +++++-----
 .../tests/gtest_column_stable_permutation.cpp  | 10 +++-------
 src/Common/levenshteinDistance.cpp             |  6 +++---
 src/Functions/FunctionsStringDistance.cpp      |  4 ++--
 src/Functions/array/arraySort.cpp              |  7 +++----
 src/Functions/rowNumberInBlock.cpp             |  3 +--
 src/Interpreters/sortBlock.cpp                 |  4 ++--
 .../Transforms/PartialSortingTransform.cpp     |  9 ++++-----
 src/QueryPipeline/QueryPipelineBuilder.cpp     | 14 +++++++-------
 15 files changed, 72 insertions(+), 80 deletions(-)

diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 0ec5db6c69d..2018015b46d 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -1,18 +1,19 @@
 #include <Columns/ColumnAggregateFunction.h>
 #include <Columns/ColumnsCommon.h>
 #include <Columns/MaskOperations.h>
-#include <Common/assert_cast.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <IO/Operators.h>
 #include <IO/WriteBufferFromArena.h>
 #include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-#include <Common/FieldVisitorToString.h>
-#include <Common/SipHash.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
 #include <Common/AlignedBuffer.h>
-#include <Common/typeid_cast.h>
 #include <Common/Arena.h>
-#include <Common/WeakHash.h>
+#include <Common/FieldVisitorToString.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/assert_cast.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 
 namespace DB
@@ -626,8 +627,7 @@ void ColumnAggregateFunction::getPermutation(PermutationSortDirection /*directio
 {
     size_t s = data.size();
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 void ColumnAggregateFunction::updatePermutation(PermutationSortDirection, PermutationSortStability,
diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp
index 10e960ea244..9aa0f5cfa49 100644
--- a/src/Columns/ColumnConst.cpp
+++ b/src/Columns/ColumnConst.cpp
@@ -2,9 +2,10 @@
 
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
-#include <Common/typeid_cast.h>
-#include <Common/WeakHash.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/WeakHash.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 #include <base/defines.h>
 
@@ -128,8 +129,7 @@ void ColumnConst::getPermutation(PermutationSortDirection /*direction*/, Permuta
                                 size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
 {
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, PermutationSortStability /*stability*/,
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index baccfc69147..20fc5d8e1fe 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -1,10 +1,11 @@
-#include <Common/Exception.h>
 #include <Common/Arena.h>
-#include <Common/SipHash.h>
-#include <Common/assert_cast.h>
-#include <Common/WeakHash.h>
+#include <Common/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/RadixSort.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/assert_cast.h>
+#include <Common/iota.h>
 
 #include <base/sort.h>
 
@@ -163,8 +164,7 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
     if constexpr (is_arithmetic_v<NativeT> && !is_big_int_v<NativeT>)
     {
@@ -183,8 +183,7 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
             /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
             if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
             {
-                for (size_t i = 0; i < data_size; ++i)
-                    res[i] = i;
+                iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
                 bool try_sort = false;
 
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 057c0cd7112..02e6e9e56b4 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -1,11 +1,12 @@
-#include <Columns/ColumnSparse.h>
-#include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnSparse.h>
 #include <Columns/ColumnTuple.h>
-#include <Common/WeakHash.h>
-#include <Common/SipHash.h>
-#include <Common/HashTable/Hash.h>
+#include <Columns/ColumnsCommon.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/iota.h>
 
 #include <algorithm>
 #include <bit>
@@ -499,8 +500,7 @@ void ColumnSparse::getPermutationImpl(IColumn::PermutationSortDirection directio
     res.resize(_size);
     if (offsets->empty())
     {
-        for (size_t i = 0; i < _size; ++i)
-            res[i] = i;
+        iota(res.data(), _size, IColumn::Permutation::value_type(0));
         return;
     }
 
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index d8992125be4..356bb0493d2 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -1,16 +1,17 @@
 #include <Columns/ColumnTuple.h>
 
-#include <base/sort.h>
-#include <Columns/IColumnImpl.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/IColumnImpl.h>
 #include <Core/Field.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <DataTypes/Serializations/SerializationInfoTuple.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <base/sort.h>
 #include <Common/WeakHash.h>
 #include <Common/assert_cast.h>
+#include <Common/iota.h>
 #include <Common/typeid_cast.h>
-#include <DataTypes/Serializations/SerializationInfoTuple.h>
 
 
 namespace DB
@@ -378,8 +379,7 @@ void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction
 {
     size_t rows = size();
     res.resize(rows);
-    for (size_t i = 0; i < rows; ++i)
-        res[i] = i;
+    iota(res.data(), rows, IColumn::Permutation::value_type(0));
 
     if (limit >= rows)
         limit = 0;
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 37e62c76596..b1cf449dfde 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -1,24 +1,25 @@
 #include "ColumnVector.h"
 
-#include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnsCommon.h>
 #include <Columns/MaskOperations.h>
 #include <Columns/RadixSortHelper.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
 #include <IO/WriteHelpers.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <base/bit_cast.h>
+#include <base/scope_guard.h>
+#include <base/sort.h>
+#include <base/unaligned.h>
 #include <Common/Arena.h>
 #include <Common/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/NaNUtils.h>
 #include <Common/RadixSort.h>
 #include <Common/SipHash.h>
-#include <Common/WeakHash.h>
 #include <Common/TargetSpecific.h>
+#include <Common/WeakHash.h>
 #include <Common/assert_cast.h>
-#include <base/sort.h>
-#include <base/unaligned.h>
-#include <base/bit_cast.h>
-#include <base/scope_guard.h>
+#include <Common/iota.h>
 
 #include <bit>
 #include <cmath>
@@ -244,8 +245,7 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
     if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
     {
diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
index 01091a87049..7c237536f94 100644
--- a/src/Columns/IColumnDummy.cpp
+++ b/src/Columns/IColumnDummy.cpp
@@ -1,7 +1,8 @@
-#include <Common/Arena.h>
-#include <Core/Field.h>
-#include <Columns/IColumnDummy.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/IColumnDummy.h>
+#include <Core/Field.h>
+#include <Common/Arena.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -87,8 +88,7 @@ void IColumnDummy::getPermutation(IColumn::PermutationSortDirection /*direction*
                 size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
 {
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 ColumnPtr IColumnDummy::replicate(const Offsets & offsets) const
diff --git a/src/Columns/tests/gtest_column_stable_permutation.cpp b/src/Columns/tests/gtest_column_stable_permutation.cpp
index df898cffa04..0dabd4d1fc2 100644
--- a/src/Columns/tests/gtest_column_stable_permutation.cpp
+++ b/src/Columns/tests/gtest_column_stable_permutation.cpp
@@ -9,7 +9,6 @@
 #include <Columns/ColumnUnique.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnsNumber.h>
-
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeMap.h>
@@ -17,6 +16,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Common/iota.h>
 
 
 using namespace DB;
@@ -32,8 +32,7 @@ void stableGetColumnPermutation(
 
     size_t size = column.size();
     out_permutation.resize(size);
-    for (size_t i = 0; i < size; ++i)
-        out_permutation[i] = i;
+    iota(out_permutation.data(), size, IColumn::Permutation::value_type(0));
 
     std::stable_sort(
         out_permutation.begin(),
@@ -146,10 +145,7 @@ void assertColumnPermutations(ColumnCreateFunc column_create_func, ValueTransfor
 
     std::vector<std::vector<Field>> ranges(ranges_size);
     std::vector<size_t> ranges_permutations(ranges_size);
-    for (size_t i = 0; i < ranges_size; ++i)
-    {
-        ranges_permutations[i] = i;
-    }
+    iota(ranges_permutations.data(), ranges_size, IColumn::Permutation::value_type(0));
 
     IColumn::Permutation actual_permutation;
     IColumn::Permutation expected_permutation;
diff --git a/src/Common/levenshteinDistance.cpp b/src/Common/levenshteinDistance.cpp
index 9eb6c0f9050..3ab80af94bb 100644
--- a/src/Common/levenshteinDistance.cpp
+++ b/src/Common/levenshteinDistance.cpp
@@ -1,5 +1,6 @@
-#include <Common/levenshteinDistance.h>
 #include <Common/PODArray.h>
+#include <Common/iota.h>
+#include <Common/levenshteinDistance.h>
 
 namespace DB
 {
@@ -11,8 +12,7 @@ size_t levenshteinDistance(const String & lhs, const String & rhs)
 
     PODArrayWithStackMemory<size_t, 64> row(n + 1);
 
-    for (size_t i = 1; i <= n; ++i)
-        row[i] = i;
+    iota(row.data() + 1, n, size_t(1));
 
     for (size_t j = 1; j <= m; ++j)
     {
diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp
index 3098d02630a..a5e819179d6 100644
--- a/src/Functions/FunctionsStringDistance.cpp
+++ b/src/Functions/FunctionsStringDistance.cpp
@@ -6,6 +6,7 @@
 #include <Functions/FunctionsStringSimilarity.h>
 #include <Common/PODArray.h>
 #include <Common/UTF8Helpers.h>
+#include <Common/iota.h>
 
 #ifdef __SSE4_2__
 #    include <nmmintrin.h>
@@ -246,8 +247,7 @@ struct ByteEditDistanceImpl
         ResultType insertion = 0;
         ResultType deletion = 0;
 
-        for (size_t i = 0; i <= haystack_size; ++i)
-            distances0[i] = i;
+        iota(distances0.data(), haystack_size + 1, ResultType(0));
 
         for (size_t pos_needle = 0; pos_needle < needle_size; ++pos_needle)
         {
diff --git a/src/Functions/array/arraySort.cpp b/src/Functions/array/arraySort.cpp
index a853289e8cc..184b1f82280 100644
--- a/src/Functions/array/arraySort.cpp
+++ b/src/Functions/array/arraySort.cpp
@@ -1,5 +1,6 @@
-#include <Functions/array/arraySort.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/array/arraySort.h>
+#include <Common/iota.h>
 
 namespace DB
 {
@@ -55,9 +56,7 @@ ColumnPtr ArraySortImpl<positive, is_partial>::execute(
     size_t size = offsets.size();
     size_t nested_size = array.getData().size();
     IColumn::Permutation permutation(nested_size);
-
-    for (size_t i = 0; i < nested_size; ++i)
-        permutation[i] = i;
+    iota(permutation.data(), nested_size, IColumn::Permutation::value_type(0));
 
     ColumnArray::Offset current_offset = 0;
     for (size_t i = 0; i < size; ++i)
diff --git a/src/Functions/rowNumberInBlock.cpp b/src/Functions/rowNumberInBlock.cpp
index e5fe2aeb178..25c9e9c56f3 100644
--- a/src/Functions/rowNumberInBlock.cpp
+++ b/src/Functions/rowNumberInBlock.cpp
@@ -56,8 +56,7 @@ public:
         auto column = ColumnUInt64::create();
         auto & data = column->getData();
         data.resize(input_rows_count);
-        for (size_t i = 0; i < input_rows_count; ++i)
-            data[i] = i;
+        iota(data.data(), input_rows_count, UInt64(0));
 
         return column;
     }
diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp
index 89c4220ccdf..d75786f33b9 100644
--- a/src/Interpreters/sortBlock.cpp
+++ b/src/Interpreters/sortBlock.cpp
@@ -4,6 +4,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnTuple.h>
 #include <Functions/FunctionHelpers.h>
+#include <Common/iota.h>
 
 #ifdef __SSE2__
     #include <emmintrin.h>
@@ -155,8 +156,7 @@ void getBlockSortPermutationImpl(const Block & block, const SortDescription & de
     {
         size_t size = block.rows();
         permutation.resize(size);
-        for (size_t i = 0; i < size; ++i)
-            permutation[i] = i;
+        iota(permutation.data(), size, IColumn::Permutation::value_type(0));
 
         if (limit >= size)
             limit = 0;
diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp
index 3fc9a4e71db..e79673f6645 100644
--- a/src/Processors/Transforms/PartialSortingTransform.cpp
+++ b/src/Processors/Transforms/PartialSortingTransform.cpp
@@ -1,7 +1,8 @@
-#include <Processors/Transforms/PartialSortingTransform.h>
-#include <Interpreters/sortBlock.h>
 #include <Core/SortCursor.h>
+#include <Interpreters/sortBlock.h>
+#include <Processors/Transforms/PartialSortingTransform.h>
 #include <Common/PODArray.h>
+#include <Common/iota.h>
 
 namespace DB
 {
@@ -36,9 +37,7 @@ size_t getFilterMask(const ColumnRawPtrs & raw_block_columns, const Columns & th
     else
     {
         rows_to_compare.resize(num_rows);
-
-        for (size_t i = 0; i < num_rows; ++i)
-            rows_to_compare[i] = i;
+        iota(rows_to_compare.data(), num_rows, UInt64(0));
 
         size_t size = description.size();
         for (size_t i = 0; i < size; ++i)
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index a0fabe3273c..46c6a77f60f 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -1,14 +1,12 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
-#include <Common/CurrentThread.h>
-#include <Common/typeid_cast.h>
-#include "Core/UUID.h"
 #include <Core/SortDescription.h>
+#include <Core/UUID.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/IJoin.h>
 #include <Interpreters/TableJoin.h>
-#include <IO/WriteHelpers.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/DelayedPortsProcessor.h>
 #include <Processors/Executors/PipelineExecutor.h>
@@ -25,11 +23,14 @@
 #include <Processors/Transforms/ExtremesTransform.h>
 #include <Processors/Transforms/JoiningTransform.h>
 #include <Processors/Transforms/MergeJoinTransform.h>
-#include <Processors/Transforms/PasteJoinTransform.h>
 #include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
 #include <Processors/Transforms/PartialSortingTransform.h>
+#include <Processors/Transforms/PasteJoinTransform.h>
 #include <Processors/Transforms/TotalsHavingTransform.h>
 #include <QueryPipeline/narrowPipe.h>
+#include <Common/CurrentThread.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 namespace DB
 {
@@ -619,8 +620,7 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
     bool has_extremes = pipe.getExtremesPort();
     size_t num_extra_ports = (has_totals ? 1 : 0) + (has_extremes ? 1 : 0);
     IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts() + num_extra_ports);
-    for (size_t i = 0; i < delayed_streams.size(); ++i)
-        delayed_streams[i] = i;
+    iota(delayed_streams.data(), delayed_streams.size(), IProcessor::PortNumbers::value_type(0));
 
     auto * collected_processors = pipe.collected_processors;
 

From 22ef5443bb0d0c2a1e6c2fa2b178765dc3cb761b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:44:16 +0100
Subject: [PATCH 084/204] Move findNumeric to .cpp

---
 .../AggregateFunctionMax.cpp                  | 10 +--
 .../AggregateFunctionMin.cpp                  | 10 +--
 src/AggregateFunctions/findNumeric.cpp        | 15 -----
 .../findNumeric.h => Common/findExtreme.cpp}  | 65 ++++++++-----------
 src/Common/findExtreme.h                      | 45 +++++++++++++
 5 files changed, 82 insertions(+), 63 deletions(-)
 delete mode 100644 src/AggregateFunctions/findNumeric.cpp
 rename src/{AggregateFunctions/findNumeric.h => Common/findExtreme.cpp} (57%)
 create mode 100644 src/Common/findExtreme.h

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index a440aedb62c..3d4d23136a1 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,7 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
-#include <AggregateFunctions/findNumeric.h>
+#include <Common/findExtreme.h>
 
 namespace DB
 {
@@ -53,10 +53,10 @@ void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<Sing
     if (if_argument_pos >= 0) \
     { \
         const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
-        opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
+        opt = findExtremeMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
     } \
     else \
-        opt = findNumericMax(column.getData().data(), row_begin, row_end); \
+        opt = findExtremeMax(column.getData().data(), row_begin, row_end); \
     if (opt.has_value()) \
         this->data(place).changeIfGreater(opt.value()); \
 }
@@ -140,10 +140,10 @@ void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<Sing
         auto final_flags = std::make_unique<UInt8[]>(row_end); \
         for (size_t i = row_begin; i < row_end; ++i) \
             final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
-        opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
+        opt = findExtremeMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
     } \
     else \
-        opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
+        opt = findExtremeMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
     if (opt.has_value()) \
         this->data(place).changeIfGreater(opt.value()); \
 }
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 8d5d12fa626..02d041ad12b 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,7 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
-#include <AggregateFunctions/findNumeric.h>
+#include <Common/findExtreme.h>
 
 
 namespace DB
@@ -54,10 +54,10 @@ public:
         if (if_argument_pos >= 0) \
         { \
             const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
-            opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
+            opt = findExtremeMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
         } \
         else \
-            opt = findNumericMin(column.getData().data(), row_begin, row_end); \
+            opt = findExtremeMin(column.getData().data(), row_begin, row_end); \
         if (opt.has_value()) \
             this->data(place).changeIfLess(opt.value()); \
     }
@@ -141,10 +141,10 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
             auto final_flags = std::make_unique<UInt8[]>(row_end); \
             for (size_t i = row_begin; i < row_end; ++i) \
                 final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
-            opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
+            opt = findExtremeMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
         } \
         else \
-            opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
+            opt = findExtremeMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
         if (opt.has_value()) \
             this->data(place).changeIfLess(opt.value()); \
     }
diff --git a/src/AggregateFunctions/findNumeric.cpp b/src/AggregateFunctions/findNumeric.cpp
deleted file mode 100644
index bbad8c1fe3d..00000000000
--- a/src/AggregateFunctions/findNumeric.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <AggregateFunctions/findNumeric.h>
-
-namespace DB
-{
-#define INSTANTIATION(T) \
-    template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
-    template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
-    template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
-
-FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
-#undef INSTANTIATION
-}
diff --git a/src/AggregateFunctions/findNumeric.h b/src/Common/findExtreme.cpp
similarity index 57%
rename from src/AggregateFunctions/findNumeric.h
rename to src/Common/findExtreme.cpp
index df7c325569a..e1f1e199d56 100644
--- a/src/AggregateFunctions/findNumeric.h
+++ b/src/Common/findExtreme.cpp
@@ -1,18 +1,9 @@
-#pragma once
-
 #include <DataTypes/IDataType.h>
-#include <base/defines.h>
-#include <base/types.h>
-#include <Common/Concepts.h>
 #include <Common/TargetSpecific.h>
-
-#include <algorithm>
-#include <optional>
+#include <Common/findExtreme.h>
 
 namespace DB
 {
-template <typename T>
-concept is_any_native_number = (is_any_of<T, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64>);
 
 template <is_any_native_number T>
 struct MinComparator
@@ -28,7 +19,7 @@ struct MaxComparator
 
 MULTITARGET_FUNCTION_AVX2_SSE42(
     MULTITARGET_FUNCTION_HEADER(template <is_any_native_number T, typename ComparatorClass, bool add_all_elements, bool add_if_cond_zero> static std::optional<T> NO_INLINE),
-    findNumericExtremeImpl,
+    findExtremeImpl,
     MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end)
     {
         size_t count = row_end - row_begin;
@@ -86,69 +77,67 @@ MULTITARGET_FUNCTION_AVX2_SSE42(
     }
 ))
 
-
 /// Given a vector of T finds the extreme (MIN or MAX) value
 template <is_any_native_number T, class ComparatorClass, bool add_all_elements, bool add_if_cond_zero>
 static std::optional<T>
-findNumericExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end)
+findExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end)
 {
 #if USE_MULTITARGET_CODE
     /// We see no benefit from using AVX512BW or AVX512F (over AVX2), so we only declare SSE and AVX2
     if (isArchSupported(TargetArch::AVX2))
-        return findNumericExtremeImplAVX2<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+        return findExtremeImplAVX2<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 
     if (isArchSupported(TargetArch::SSE42))
-        return findNumericExtremeImplSSE42<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+        return findExtremeImplSSE42<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 #endif
-    return findNumericExtremeImpl<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+    return findExtremeImpl<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end)
+std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, true, false>(ptr, nullptr, start, end);
+    return findExtreme<T, MinComparator<T>, true, false>(ptr, nullptr, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, false, true>(ptr, condition_map, start, end);
+    return findExtreme<T, MinComparator<T>, false, true>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, false, false>(ptr, condition_map, start, end);
+    return findExtreme<T, MinComparator<T>, false, false>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end)
+std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, true, false>(ptr, nullptr, start, end);
+    return findExtreme<T, MaxComparator<T>, true, false>(ptr, nullptr, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, false, true>(ptr, condition_map, start, end);
+    return findExtreme<T, MaxComparator<T>, false, true>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, false, false>(ptr, condition_map, start, end);
+    return findExtreme<T, MaxComparator<T>, false, false>(ptr, condition_map, start, end);
 }
 
 
-#define EXTERN_INSTANTIATION(T) \
-    extern template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
-
-    FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION)
-#undef EXTERN_INSTANTIATION
+#define INSTANTIATION(T) \
+    template std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end); \
+    template std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end); \
+    template std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
 
+FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
+#undef INSTANTIATION
 }
diff --git a/src/Common/findExtreme.h b/src/Common/findExtreme.h
new file mode 100644
index 00000000000..b38c24697c0
--- /dev/null
+++ b/src/Common/findExtreme.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <base/defines.h>
+#include <base/types.h>
+#include <Common/Concepts.h>
+
+#include <algorithm>
+#include <optional>
+
+namespace DB
+{
+template <typename T>
+concept is_any_native_number = (is_any_of<T, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64>);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+#define EXTERN_INSTANTIATION(T) \
+    extern template std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+    FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION)
+#undef EXTERN_INSTANTIATION
+
+}

From 5fb7f9f861ea5adaece97c1afbd4ba1283957049 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 18:09:20 +0100
Subject: [PATCH 085/204] Ignore other numeric types for now

---
 src/AggregateFunctions/AggregateFunctionMax.cpp | 14 ++++++++++++++
 src/AggregateFunctions/AggregateFunctionMin.cpp | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index 3d4d23136a1..2577c932592 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -74,6 +74,13 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
@@ -162,6 +169,13 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 02d041ad12b..701101e7207 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -75,6 +75,13 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
@@ -163,6 +170,13 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)

From b95bdef09ee9474193beaba8c6eab078bb9970eb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 29 Dec 2023 17:41:11 +0000
Subject: [PATCH 086/204] Update StorageS3 and StorageS3Cluster

---
 src/Storages/HDFS/StorageHDFS.cpp       |  17 +--
 src/Storages/S3Queue/StorageS3Queue.cpp | 117 +++++++++++++++---
 src/Storages/S3Queue/StorageS3Queue.h   |  11 +-
 src/Storages/StorageFile.cpp            |   4 +-
 src/Storages/StorageS3.cpp              | 152 ++++++++++++++++++------
 src/Storages/StorageS3.h                |  14 ++-
 src/Storages/VirtualColumnUtils.cpp     |   6 +-
 src/Storages/VirtualColumnUtils.h       |   2 +-
 8 files changed, 238 insertions(+), 85 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 9d719413c8d..fe37b2eb57a 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -417,7 +417,7 @@ public:
         uris = getPathsList(path_from_uri, uri_without_path, context);
         ActionsDAGPtr filter_dag;
         if (!uris.empty())
-             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, uris[0].path);
+             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
         if (filter_dag)
         {
@@ -492,7 +492,7 @@ public:
     {
         ActionsDAGPtr filter_dag;
         if (!uris.empty())
-            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first);
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
         if (filter_dag)
         {
@@ -893,9 +893,6 @@ public:
         ReadFromFormatInfo info_,
         bool need_only_count_,
         std::shared_ptr<StorageHDFS> storage_,
-        // StorageSnapshotPtr storage_snapshot_,
-        // const StorageEmbeddedRocksDB & storage_,
-        // SelectQueryInfo query_info_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
@@ -907,9 +904,6 @@ public:
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , storage(std::move(storage_))
-        // , storage_snapshot(std::move(storage_snapshot_))
-        // , storage(storage_)
-        // , query_info(std::move(query_info_))
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -925,19 +919,12 @@ private:
     const bool need_only_count;
     std::shared_ptr<StorageHDFS> storage;
 
-    // StorageSnapshotPtr storage_snapshot;
-    // const StorageEmbeddedRocksDB & storage;
-    // SelectQueryInfo query_info;
     ContextPtr context;
-
     size_t max_block_size;
     size_t num_streams;
 
     std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 33e63d45c8d..1a6666c00d0 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,3 +1,6 @@
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
 #if USE_AWS_S3
@@ -204,10 +207,71 @@ bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings);
 }
 
-Pipe StorageS3Queue::read(
+class ReadFromS3Queue : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromS3Queue"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromS3Queue(
+        Block sample_block,
+        ReadFromFormatInfo info_,
+        std::shared_ptr<StorageS3Queue> storage_,
+        // StorageSnapshotPtr storage_snapshot_,
+        // Names column_names_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , info(std::move(info_))
+        , storage(std::move(storage_))
+        // , storage_snapshot(std::move(storage_snapshot_))
+        // , column_names(std::move(column_names_))
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    ReadFromFormatInfo info;
+    std::shared_ptr<StorageS3Queue> storage;
+    // StorageSnapshotPtr storage_snapshot;
+    // Names column_names;
+    ContextPtr context;
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<StorageS3Queue::FileIterator> iterator;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator)
+        return;
+
+    iterator = storage->createFileIterator(context, predicate);
+}
+
+
+void ReadFromS3Queue::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageS3Queue::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
+    SelectQueryInfo & /*query_info*/,
     ContextPtr local_context,
     QueryProcessingStage::Enum /*processed_stage*/,
     size_t max_block_size,
@@ -225,27 +289,43 @@ Pipe StorageS3Queue::read(
                         "Cannot read from {} with attached materialized views", getName());
     }
 
-    Pipes pipes;
-    const size_t adjusted_num_streams = std::min<size_t>(num_streams, s3queue_settings->s3queue_processing_threads_num);
+    auto this_ptr = std::static_pointer_cast<StorageS3Queue>(shared_from_this());
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    auto file_iterator = createFileIterator(local_context, query_info.query);
+    auto reading = std::make_unique<ReadFromS3Queue>(
+        read_from_format_info.source_header,
+        read_from_format_info,
+        std::move(this_ptr),
+        // storage_snapshot,
+        // column_names,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    Pipes pipes;
+    const size_t adjusted_num_streams = std::min<size_t>(num_streams, storage->s3queue_settings->s3queue_processing_threads_num);
+
+    createIterator(nullptr);
     for (size_t i = 0; i < adjusted_num_streams; ++i)
-        pipes.emplace_back(createSource(file_iterator, column_names, storage_snapshot, max_block_size, local_context));
-    return Pipe::unitePipes(std::move(pipes));
+        pipes.emplace_back(storage->createSource(info, iterator, max_block_size, context));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
+    const ReadFromFormatInfo & info,
     std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
     size_t max_block_size,
     ContextPtr local_context)
 {
     auto configuration_snapshot = updateConfigurationAndGetCopy(local_context);
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
     auto internal_source = std::make_unique<StorageS3Source>(
-        read_from_format_info, configuration.format, getName(), local_context, format_settings,
+        info, configuration.format, getName(), local_context, format_settings,
         max_block_size,
         configuration_snapshot.request_settings,
         configuration_snapshot.compression_method,
@@ -253,7 +333,7 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         configuration_snapshot.url.bucket,
         configuration_snapshot.url.version_id,
         configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
-        file_iterator, local_context->getSettingsRef().max_download_threads, false, /* query_info */ std::nullopt);
+        file_iterator, local_context->getSettingsRef().max_download_threads, false);
 
     auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable
     {
@@ -277,8 +357,8 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
     };
     auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr;
     return std::make_shared<StorageS3QueueSource>(
-        getName(), read_from_format_info.source_header, std::move(internal_source),
-        files_metadata, after_processing, file_deleter, read_from_format_info.requested_virtual_columns,
+        getName(), info.source_header, std::move(internal_source),
+        files_metadata, after_processing, file_deleter, info.requested_virtual_columns,
         local_context, shutdown_called, table_is_being_dropped, s3_queue_log, getStorageID(), log);
 }
 
@@ -375,13 +455,14 @@ bool StorageS3Queue::streamToViews()
     auto block_io = interpreter.execute();
     auto file_iterator = createFileIterator(s3queue_context, nullptr);
 
+    auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals());
+
     Pipes pipes;
     pipes.reserve(s3queue_settings->s3queue_processing_threads_num);
     for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i)
     {
         auto source = createSource(
-            file_iterator, block_io.pipeline.getHeader().getNames(),
-            storage_snapshot, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context);
+            read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context);
 
         pipes.emplace_back(std::move(source));
     }
@@ -479,10 +560,10 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
     }
 }
 
-std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query)
+std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
 {
     auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
-        *configuration.client, configuration.url, query, virtual_columns, local_context,
+        *configuration.client, configuration.url, predicate, virtual_columns, local_context,
         /* read_keys */nullptr, configuration.request_settings);
     return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), shutdown_called);
 }
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index f26b1175150..3d3594dc2ab 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -39,10 +39,11 @@ public:
 
     String getName() const override { return "S3Queue"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
+        SelectQueryInfo & /*query_info*/,
         ContextPtr context,
         QueryProcessingStage::Enum processed_stage,
         size_t max_block_size,
@@ -57,6 +58,7 @@ public:
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
 private:
+    friend class ReadFromS3Queue;
     using FileIterator = StorageS3QueueSource::FileIterator;
 
     const std::unique_ptr<S3QueueSettings> s3queue_settings;
@@ -85,11 +87,10 @@ private:
     bool supportsSubsetOfColumns(const ContextPtr & context_) const;
     bool supportsSubcolumns() const override { return true; }
 
-    std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, ASTPtr query);
+    std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate);
     std::shared_ptr<StorageS3QueueSource> createSource(
+        const ReadFromFormatInfo & info,
         std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
-        const Names & column_names,
-        const StorageSnapshotPtr & storage_snapshot,
         size_t max_block_size,
         ContextPtr local_context);
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b040f452410..e4619d64ae3 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -960,8 +960,8 @@ StorageFileSource::FilesIterator::FilesIterator(
     : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
 {
     ActionsDAGPtr filter_dag;
-    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0]);
+    if (!distributed_processing && !archive_info && !files.empty())
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
     if (filter_dag)
         VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_);
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 096e2e88f91..780a2755bcf 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -159,6 +159,8 @@ public:
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
     {
+        query_configuration = storage.updateConfigurationAndGetCopy(local_context);
+        virtual_columns = storage.getVirtuals();
     }
 
 private:
@@ -166,10 +168,17 @@ private:
     StorageSnapshotPtr storage_snapshot;
     StorageS3 & storage;
     SelectQueryInfo query_info;
+    StorageS3::Configuration query_configuration;
+    NamesAndTypesList virtual_columns;
+
     ContextPtr local_context;
 
     size_t max_block_size;
     size_t num_streams;
+
+    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper;
+
+    void createIterator(const ActionsDAG::Node * predicate);
 };
 
 
@@ -231,24 +240,14 @@ static std::vector<String> filterKeysForPartitionPruning(
     const std::vector<String> & keys,
     const String & bucket,
     const NamesAndTypesList & virtual_columns,
-    const std::vector<ActionsDAGPtr> & filter_dags,
+    const ActionsDAG::Node * predicate,
     ContextPtr context)
 {
     std::unordered_set<String> result_keys(keys.begin(), keys.end());
-    for (const auto & filter_dag : filter_dags)
-    {
-        if (result_keys.empty())
-            break;
 
-        auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
-
-        auto filter_actions = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), block);
-        if (!filter_actions)
-            continue;
-        VirtualColumnUtils::filterBlockWithDAG(filter_actions, block, context);
-
-        result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
-    }
+    auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
+    VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context);
+    result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
 
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", result_keys.size(), keys.size());
     return std::vector<String>(result_keys.begin(), result_keys.end());
@@ -309,6 +308,57 @@ public:
         fillInternalBufferAssumeLocked();
     }
 
+    Impl(
+        const S3::Client & client_,
+        const S3::URI & globbed_uri_,
+        const ActionsDAG::Node * predicate,
+        const NamesAndTypesList & virtual_columns_,
+        ContextPtr context_,
+        KeysWithInfo * read_keys_,
+        const S3Settings::RequestSettings & request_settings_,
+        std::function<void(FileProgress)> file_progress_callback_)
+        : WithContext(context_)
+        , client(client_.clone())
+        , globbed_uri(globbed_uri_)
+        , virtual_columns(virtual_columns_)
+        , read_keys(read_keys_)
+        , request_settings(request_settings_)
+        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
+        , file_progress_callback(file_progress_callback_)
+    {
+        if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
+            throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
+
+        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
+
+        /// We don't have to list bucket, because there is no asterisks.
+        if (key_prefix.size() == globbed_uri.key.size())
+        {
+            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
+            buffer_iter = buffer.begin();
+            is_finished = true;
+            return;
+        }
+
+        request.SetBucket(globbed_uri.bucket);
+        request.SetPrefix(key_prefix);
+        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
+
+        outcome_future = listObjectsAsync();
+
+        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
+        if (!matcher->ok())
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
+
+        recursive = globbed_uri.key == "/**" ? true : false;
+        fillInternalBufferAssumeLocked();
+
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        is_initialized = true;
+    }
+
     KeyWithInfoPtr next()
     {
         std::lock_guard lock(mutex);
@@ -439,6 +489,15 @@ private:
 
             VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast);
         }
+        else if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(temp_buffer.size());
+            for (const auto & key_with_info : temp_buffer)
+                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
+
+            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext());
+        }
 
         buffer = std::move(temp_buffer);
 
@@ -481,6 +540,7 @@ private:
     NamesAndTypesList virtual_columns;
     bool is_initialized{false};
     ASTPtr filter_ast;
+    ActionsDAGPtr filter_dag;
     std::unique_ptr<re2::RE2> matcher;
     bool recursive{false};
     bool is_finished{false};
@@ -508,6 +568,19 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
 {
 }
 
+StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
+    const S3::Client & client_,
+    const S3::URI & globbed_uri_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns_,
+    ContextPtr context,
+    KeysWithInfo * read_keys_,
+    const S3Settings::RequestSettings & request_settings_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
+{
+}
+
 StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next()
 {
     return pimpl->next();
@@ -646,8 +719,7 @@ StorageS3Source::StorageS3Source(
     const String & url_host_and_port_,
     std::shared_ptr<IIterator> file_iterator_,
     const size_t max_parsing_threads_,
-    bool need_only_count_,
-    std::optional<SelectQueryInfo> query_info_)
+    bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
     , name(std::move(name_))
@@ -663,7 +735,6 @@ StorageS3Source::StorageS3Source(
     , client(client_)
     , sample_block(info.format_header)
     , format_settings(format_settings_)
-    , query_info(std::move(query_info_))
     , requested_virtual_columns(info.requested_virtual_columns)
     , file_iterator(file_iterator_)
     , max_parsing_threads(max_parsing_threads_)
@@ -1151,8 +1222,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     const StorageS3::Configuration & configuration,
     bool distributed_processing,
     ContextPtr local_context,
-    ASTPtr query,
-    const std::vector<ActionsDAGPtr> & filter_dags,
+    const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     StorageS3::KeysWithInfo * read_keys = nullptr,
     std::function<void(FileProgress)> file_progress_callback = {})
@@ -1165,12 +1235,12 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *configuration.client, configuration.url, query, virtual_columns,
+            *configuration.client, configuration.url, predicate, virtual_columns,
             local_context, read_keys, configuration.request_settings, file_progress_callback);
     }
     else
     {
-        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, filter_dags, local_context);
+        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, predicate, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1217,19 +1287,34 @@ void StorageS3::read(
     query_plan.addStep(std::move(reading));
 }
 
+void ReadFromStorageS3Step::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, local_context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
+    iterator_wrapper = createFileIterator(
+        query_configuration, storage.distributed_processing, local_context, predicate,
+        virtual_columns, nullptr, local_context->getFileProgressCallback());
+}
+
 void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    auto query_configuration = storage.updateConfigurationAndGetCopy(local_context);
-
     if (storage.partition_by && query_configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
 
-    auto virtual_columns = storage.getVirtuals();
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
+    createIterator(nullptr);
 
-    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, storage.distributed_processing, local_context, query_info.query, filter_dags,
-        virtual_columns, nullptr, local_context->getFileProgressCallback());
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1264,19 +1349,12 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
             iterator_wrapper,
             max_parsing_threads,
-            need_only_count,
-            query_info));
+            need_only_count));
     }
 
     pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
-
-void ReadFromStorageS3Step::applyFilters()
-{
-    /// We will use filter_dags in filterKeysForPartitionPruning called from initializePipeline, nothing to do here
-}
-
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
 {
     auto query_configuration = updateConfigurationAndGetCopy(local_context);
@@ -1853,7 +1931,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 {
     KeysWithInfo read_keys;
 
-    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, {}, &read_keys);
+    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
 
     ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
     return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 07d965d8bb3..dd7e0edb2d9 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -85,6 +85,16 @@ public:
             const S3Settings::RequestSettings & request_settings_ = {},
             std::function<void(FileProgress)> progress_callback_ = {});
 
+        DisclosedGlobIterator(
+            const S3::Client & client_,
+            const S3::URI & globbed_uri_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns,
+            ContextPtr context,
+            KeysWithInfo * read_keys_ = nullptr,
+            const S3Settings::RequestSettings & request_settings_ = {},
+            std::function<void(FileProgress)> progress_callback_ = {});
+
         KeyWithInfoPtr next() override;
         size_t estimatedKeysCount() override;
 
@@ -145,8 +155,7 @@ public:
         const String & url_host_and_port,
         std::shared_ptr<IIterator> file_iterator_,
         size_t max_parsing_threads,
-        bool need_only_count_,
-        std::optional<SelectQueryInfo> query_info);
+        bool need_only_count_);
 
     ~StorageS3Source() override;
 
@@ -180,7 +189,6 @@ private:
     std::shared_ptr<const S3::Client> client;
     Block sample_block;
     std::optional<FormatSettings> format_settings;
-    std::optional<SelectQueryInfo> query_info;
 
     struct ReaderHolder
     {
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 7690e160255..b63b4e7cca7 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -390,7 +390,7 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s
     block.getByName("_idx").column->assumeMutableRef().insert(idx);
 }
 
-ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example)
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns)
 {
     if (!predicate || virtual_columns.empty())
         return {};
@@ -401,10 +401,8 @@ ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, con
         if (column.name == "_file" || column.name == "_path")
             block.insert({column.type->createColumn(), column.type, column.name});
     }
-    /// Create a block with one row to construct filter
-    /// Append "idx" column as the filter result
+
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-    addPathAndFileToVirtualColumns(block, path_example, 0);
     return splitFilterDagForAllowedInputs(predicate, block);
 }
 
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 4f9636b4213..6e1af0995cc 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -77,7 +77,7 @@ void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & pa
     sources = std::move(filtered_sources);
 }
 
-ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example);
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
 ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 

From 47c3696a46a19674e04c6a1099e0a1429b90933e Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Fri, 29 Dec 2023 20:41:33 +0100
Subject: [PATCH 087/204] Fix build

---
 src/Interpreters/DDLTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index e7796c5d3a5..85bf6fec655 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -221,7 +221,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     std::exception_ptr first_exception = nullptr;
 
     const auto maybe_secure_port = global_context->getTCPPortSecure();
-    const auto port = global_context->getTCPPort()
+    const auto port = global_context->getTCPPort();
 
     if (config_host_name)
     {

From a4ac45f2ccc4a737f20dfe5a97f8b2085f4b6a24 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 30 Dec 2023 10:56:55 +0100
Subject: [PATCH 088/204] Fix 02943_rmt_alter_metadata_merge_checksum_mismatch
 flakiness

Disable keeper injection to make part names static, unlike here [1]:

    azat@s1:~/ch/tmp/57755 [1] {elapsed: 301s}$ zstd -cdq clickhouse-server.log.zst | grep c287beae-b56e-4193-b4c2-812ca5c52919
    2023.12.30 03:20:15.984668 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> executeQuery: (from [::1]:34846) (comment: 02943_rmt_alter_metadata_merge_checksum_mismatch.sh) insert into data_r2 (key) values  (stage: Complete)
    2023.12.30 03:20:15.987023 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> ContextAccess (default): Access granted: INSERT(key) ON test_y82swg5w.data_r2
    2023.12.30 03:20:16.007771 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c): Trying to reserve 1.00 MiB using storage policy from min volume index 0
    2023.12.30 03:20:16.008769 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> DiskLocal: Reserved 1.00 MiB on local disk `default`, having unreserved 94.58 GiB.
    2023.12.30 03:20:16.034847 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> MergedBlockOutputStream: filled checksums all_1_1_0 (state Temporary)
    2023.12.30 03:20:16.037815 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): Wrote block with ID 'all_16201685294980115408_4608068419994166055', 1 rows
    2023.12.30 03:20:16.144259 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperWithFaultInjection call FAILED: seed=8123498043031264807 func=tryMulti path=/clickhouse/tables/test_y82swg5w/data/blocks/all_16201685294980115408_4608068419994166055 code=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.148988 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: setKeeperError: error=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.149232 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: will retry due to error: retry_count=1/100 timeout=1ms error=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.155148 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c): Renaming temporary part tmp_insert_all_1_1_0 to all_3_3_0 with tid (1, 1, 00000000-0000-0000-0000-000000000000).
    2023.12.30 03:20:16.161514 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: succeeded after: Iterations=2 Total keeper failures=1/100
    2023.12.30 03:20:16.166390 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> executeQuery: Read 1 rows, 4.00 B in 0.181492 sec., 5.509884733211382 rows/sec., 22.04 B/sec.
    2023.12.30 03:20:16.166950 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> MemoryTracker: Peak memory usage (for query): 3.13 MiB.
    2023.12.30 03:20:16.168314 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> TCPHandler: Processed in 0.186244473 sec.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/57755/a12df35be4c6954e683dbea53c00599ca6a96d5d/stateless_tests_flaky_check__asan_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../02943_rmt_alter_metadata_merge_checksum_mismatch.sh       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
index 20cffcd9f65..431f59d7918 100755
--- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
@@ -34,7 +34,7 @@ function restore_failpoints()
 }
 trap restore_failpoints EXIT
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q "
     drop table if exists data_r1;
     drop table if exists data_r2;
 
@@ -80,7 +80,7 @@ fi
 # This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge)
 $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q "
     insert into $success_replica (key) values (2); -- part all_2_2_0
     optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1
     system sync replica $failed_replica pull;

From 6e2c4f04aaa17a70c37279461be84382e3c8970d Mon Sep 17 00:00:00 2001
From: Bharat Nallan Chakravarthy <bharatnc@gmail.com>
Date: Mon, 1 Jan 2024 21:31:22 -0800
Subject: [PATCH 089/204] support hints for database engine

---
 src/Databases/DatabaseFactory.cpp | 11 +++++++++--
 src/Databases/DatabaseFactory.h   | 11 ++++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 2c2e4030821..fc8073eac3b 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -92,9 +92,16 @@ void validate(const ASTCreateQuery & create_query)
 
 DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context)
 {
+    const auto engine_name = create.storage->engine->name;
     /// check if the database engine is a valid one before proceeding
-    if (!database_engines.contains(create.storage->engine->name))
-        throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", create.storage->engine->name);
+    if (!database_engines.contains(engine_name))
+    {
+        auto hints = getHints(engine_name);
+        if (!hints.empty())
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine {}. Maybe you meant: {}", engine_name, toString(hints));
+        else
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", create.storage->engine->name);
+    }
 
     /// if the engine is found (i.e. registered with the factory instance), then validate if the
     /// supplied engine arguments, settings and table overrides are valid for the engine.
diff --git a/src/Databases/DatabaseFactory.h b/src/Databases/DatabaseFactory.h
index c86eaddb29d..6b92963f46e 100644
--- a/src/Databases/DatabaseFactory.h
+++ b/src/Databases/DatabaseFactory.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/NamePrompter.h>
 #include <Interpreters/Context_fwd.h>
 #include <Databases/IDatabase.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -24,7 +25,7 @@ static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &eng
     return ast->as<ASTLiteral>()->value.safeGet<ValueType>();
 }
 
-class DatabaseFactory : private boost::noncopyable
+class DatabaseFactory : private boost::noncopyable, public IHints<>
 {
 public:
 
@@ -52,6 +53,14 @@ public:
 
     const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
 
+    std::vector<String> getAllRegisteredNames() const override
+    {
+        std::vector<String> result;
+        auto getter = [](const auto & pair) { return pair.first; };
+        std::transform(database_engines.begin(), database_engines.end(), std::back_inserter(result), getter);
+        return result;
+    }
+
 private:
     DatabaseEngines database_engines;
 

From b7cc6d4615c6ce47c695962747044fb1e49c099b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 13:08:04 +0000
Subject: [PATCH 090/204] Fixing tests.

---
 src/Storages/StorageFile.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index e4619d64ae3..12a8eed106e 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1517,7 +1517,16 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    size_t output_ports = pipe.numOutputPorts();
+    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
+        pipe.resize(max_num_streams);
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 

From 0f76967f9755d3b15eb530a1a6e2dc00e653b9d9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 13:45:41 +0000
Subject: [PATCH 091/204] Add reading step to Azure.

---
 src/Storages/StorageAzureBlob.cpp | 207 ++++++++++++++++++++++++------
 src/Storages/StorageAzureBlob.h   |  24 +++-
 2 files changed, 188 insertions(+), 43 deletions(-)

diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 9564bad485c..048248ef334 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,4 +1,6 @@
 #include <Storages/StorageAzureBlob.h>
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
 
 
 #if USE_AZURE_BLOB_STORAGE
@@ -666,7 +668,58 @@ private:
 
 }
 
-Pipe StorageAzureBlob::read(
+class ReadFromAzureBlob : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromAzureBlob"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromAzureBlob(
+        Block sample_block,
+        std::shared_ptr<StorageAzureBlob> storage_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageAzureBlob> storage;
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    const size_t num_streams;
+
+    std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromAzureBlob::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageAzureBlob::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -678,51 +731,76 @@ Pipe StorageAzureBlob::read(
     if (partition_by && configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet");
 
-    Pipes pipes;
-
-    std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
-    if (distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(local_context,
-            local_context->getReadTaskCallback());
-    }
-    else if (configuration.withGlobs())
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
-            object_storage.get(), configuration.container, configuration.blob_path,
-            query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback());
-    }
-    else
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
-            object_storage.get(), configuration.container, configuration.blobs_paths,
-            query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback());
-    }
+    auto this_ptr = std::static_pointer_cast<StorageAzureBlob>(shared_from_this());
 
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
+    auto reading = std::make_unique<ReadFromAzureBlob>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        std::move(read_from_format_info),
+        need_only_count,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
+    const auto & configuration = storage->configuration;
+
+    if (storage->distributed_processing)
+    {
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(context,
+            context->getReadTaskCallback());
+    }
+    else if (configuration.withGlobs())
+    {
+        /// Iterate through disclosed globs and make a source for each file
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
+            storage->object_storage.get(), configuration.container, configuration.blob_path,
+            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
+    }
+    else
+    {
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
+            storage->object_storage.get(), configuration.container, configuration.blobs_paths,
+            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
+    }
+}
+
+void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
+
+    const auto & configuration = storage->configuration;
+    Pipes pipes;
+
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageAzureBlobSource>(
-            read_from_format_info,
+            info,
             configuration.format,
             getName(),
-            local_context,
-            format_settings,
+            context,
+            storage->format_settings,
             max_block_size,
             configuration.compression_method,
-            object_storage.get(),
+            storage->object_storage.get(),
             configuration.container,
             configuration.connection_url,
             iterator_wrapper,
-            need_only_count,
-            query_info));
+            need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
@@ -872,6 +950,55 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     recursive = blob_path_with_globs == "/**" ? true : false;
 }
 
+StorageAzureBlobSource::GlobIterator::GlobIterator(
+    AzureObjectStorage * object_storage_,
+    const std::string & container_,
+    String blob_path_with_globs_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns_,
+    ContextPtr context_,
+    RelativePathsWithMetadata * outer_blobs_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : IIterator(context_)
+    , object_storage(object_storage_)
+    , container(container_)
+    , blob_path_with_globs(blob_path_with_globs_)
+    , virtual_columns(virtual_columns_)
+    , outer_blobs(outer_blobs_)
+    , file_progress_callback(file_progress_callback_)
+{
+
+    const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
+
+    /// We don't have to list bucket, because there is no asterisks.
+    if (key_prefix.size() == blob_path_with_globs.size())
+    {
+        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
+        blobs_with_metadata.emplace_back(
+            blob_path_with_globs,
+            object_metadata);
+        if (outer_blobs)
+            outer_blobs->emplace_back(blobs_with_metadata.back());
+        if (file_progress_callback)
+            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
+        is_finished = true;
+        return;
+    }
+
+    object_storage_iterator = object_storage->iterate(key_prefix);
+
+    matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(blob_path_with_globs));
+
+    if (!matcher->ok())
+        throw Exception(
+            ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error());
+
+    recursive = blob_path_with_globs == "/**" ? true : false;
+
+    filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+    is_initialized = true;
+}
+
 RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
 {
     std::lock_guard lock(next_mutex);
@@ -924,6 +1051,15 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
 
             VirtualColumnUtils::filterByPathOrFile(new_batch, paths, query, virtual_columns, getContext(), filter_ast);
         }
+        else if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(new_batch.size());
+            for (auto & path_with_metadata : new_batch)
+                paths.push_back(fs::path(container) / path_with_metadata.relative_path);
+
+            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
+        }
 
         if (outer_blobs)
             outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end());
@@ -948,7 +1084,7 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
     AzureObjectStorage * object_storage_,
     const std::string & container_,
     const Strings & keys_,
-    ASTPtr query_,
+    const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
     ContextPtr context_,
     RelativePathsWithMetadata * outer_blobs,
@@ -956,23 +1092,22 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
     : IIterator(context_)
     , object_storage(object_storage_)
     , container(container_)
-    , query(query_)
     , virtual_columns(virtual_columns_)
 {
     Strings all_keys = keys_;
 
     ASTPtr filter_ast;
     if (!all_keys.empty())
-        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / all_keys[0], getContext());
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
-    if (filter_ast)
+    if (filter_dag)
     {
         Strings paths;
         paths.reserve(all_keys.size());
         for (const auto & key : all_keys)
             paths.push_back(fs::path(container) / key);
 
-        VirtualColumnUtils::filterByPathOrFile(all_keys, paths, query, virtual_columns, getContext(), filter_ast);
+        VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext());
     }
 
     for (auto && key : all_keys)
@@ -1078,8 +1213,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     const String & container_,
     const String & connection_url_,
     std::shared_ptr<IIterator> file_iterator_,
-    bool need_only_count_,
-    const SelectQueryInfo & query_info_)
+    bool need_only_count_)
     :ISource(info.source_header, false)
     , WithContext(context_)
     , requested_columns(info.requested_columns)
@@ -1096,7 +1230,6 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     , connection_url(connection_url_)
     , file_iterator(file_iterator_)
     , need_only_count(need_only_count_)
-    , query_info(query_info_)
     , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "AzureReader"))
 {
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index bf4f6f37efe..30b91b7f85a 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -88,7 +88,8 @@ public:
         return name;
     }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names &,
         const StorageSnapshotPtr &,
         SelectQueryInfo &,
@@ -126,6 +127,8 @@ public:
         bool distributed_processing = false);
 
 private:
+    friend class ReadFromAzureBlob;
+
     std::string name;
     Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
@@ -162,6 +165,16 @@ public:
             RelativePathsWithMetadata * outer_blobs_,
             std::function<void(FileProgress)> file_progress_callback_ = {});
 
+        GlobIterator(
+            AzureObjectStorage * object_storage_,
+            const std::string & container_,
+            String blob_path_with_globs_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns_,
+            ContextPtr context_,
+            RelativePathsWithMetadata * outer_blobs_,
+            std::function<void(FileProgress)> file_progress_callback_ = {});
+
         RelativePathWithMetadata next() override;
         ~GlobIterator() override = default;
 
@@ -171,6 +184,7 @@ public:
         String blob_path_with_globs;
         ASTPtr query;
         ASTPtr filter_ast;
+        ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
         size_t index = 0;
@@ -212,7 +226,7 @@ public:
             AzureObjectStorage * object_storage_,
             const std::string & container_,
             const Strings & keys_,
-            ASTPtr query_,
+            const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns_,
             ContextPtr context_,
             RelativePathsWithMetadata * outer_blobs,
@@ -226,7 +240,7 @@ public:
         std::string container;
         RelativePathsWithMetadata keys;
 
-        ASTPtr query;
+        ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
         std::atomic<size_t> index = 0;
@@ -244,8 +258,7 @@ public:
         const String & container_,
         const String & connection_url_,
         std::shared_ptr<IIterator> file_iterator_,
-        bool need_only_count_,
-        const SelectQueryInfo & query_info_);
+        bool need_only_count_);
     ~StorageAzureBlobSource() override;
 
     Chunk generate() override;
@@ -271,7 +284,6 @@ private:
     std::shared_ptr<IIterator> file_iterator;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    SelectQueryInfo query_info;
 
     struct ReaderHolder
     {

From d3d5976d3e93a9fa7f14462ce84a1136e3437fee Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 2 Jan 2024 15:13:25 +0100
Subject: [PATCH 092/204] fix

---
 src/Interpreters/executeDDLQueryOnCluster.cpp               | 3 ++-
 .../0_stateless/02447_drop_database_replica.reference       | 6 ++++--
 tests/queries/0_stateless/02447_drop_database_replica.sh    | 6 ++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index ba7638cd83f..6b6054fdae3 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -312,7 +312,8 @@ DDLQueryStatusSource::DDLQueryStatusSource(
     , log(&Poco::Logger::get("DDLQueryStatusSource"))
 {
     auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
-    throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
+    throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE
+        || output_mode == DistributedDDLOutputMode::NONE;
 
     if (hosts_to_wait)
     {
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index 8ad9008057f..7be5dde1998 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -13,11 +13,12 @@ t
 rdb_default	1	1	s1	r1	1
 2
 s1	r1	OK	2	0
-s2	r1	QUEUED	2	0
 s1	r2	QUEUED	2	0
+s2	r1	QUEUED	2	0
+2
 s1	r1	OK	2	0
-s2	r1	QUEUED	2	0
 s1	r2	QUEUED	2	0
+s2	r1	QUEUED	2	0
 2
 rdb_default	1	1	s1	r1	1
 rdb_default	1	2	s1	r2	0
@@ -26,4 +27,5 @@ rdb_default	1	2	s1	r2	0
 t
 t2
 t3
+t4
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index 388af3fad74..d12f173f388 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -32,8 +32,10 @@ $CLICKHOUSE_CLIENT -q "system sync database replica $db"
 $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
-$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log"
-$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log"
+# Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>/dev/null | sort
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort
 
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"

From 3e3fed1cbe2b6b67c02a852164653a8b241c672a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 15:18:13 +0000
Subject: [PATCH 093/204] Add reading step to URL

---
 src/Storages/HDFS/StorageHDFS.cpp       |  14 +-
 src/Storages/S3Queue/StorageS3Queue.cpp |  19 +-
 src/Storages/StorageAzureBlob.cpp       |  10 +-
 src/Storages/StorageFile.cpp            |   5 +
 src/Storages/StorageS3.cpp              |  10 +-
 src/Storages/StorageURL.cpp             | 285 +++++++++++++++++++-----
 src/Storages/StorageURL.h               |  10 +-
 src/Storages/StorageXDBC.cpp            |   5 +-
 src/Storages/StorageXDBC.h              |   3 +-
 9 files changed, 280 insertions(+), 81 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index fe37b2eb57a..c7cbaa1e561 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -1,3 +1,4 @@
+#include "Processors/Sources/NullSource.h"
 #include "config.h"
 
 #if USE_HDFS
@@ -1014,10 +1015,17 @@ void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
             context,
             max_block_size,
             iterator_wrapper,
-            need_only_count)); //,
-            //query_info));
+            need_only_count));
     }
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 1a6666c00d0..6d078e1aa1b 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,5 +1,6 @@
 #include "Processors/QueryPlan/QueryPlan.h"
 #include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
 #include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
@@ -218,16 +219,12 @@ public:
         Block sample_block,
         ReadFromFormatInfo info_,
         std::shared_ptr<StorageS3Queue> storage_,
-        // StorageSnapshotPtr storage_snapshot_,
-        // Names column_names_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , info(std::move(info_))
         , storage(std::move(storage_))
-        // , storage_snapshot(std::move(storage_snapshot_))
-        // , column_names(std::move(column_names_))
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -237,8 +234,6 @@ public:
 private:
     ReadFromFormatInfo info;
     std::shared_ptr<StorageS3Queue> storage;
-    // StorageSnapshotPtr storage_snapshot;
-    // Names column_names;
     ContextPtr context;
     size_t max_block_size;
     size_t num_streams;
@@ -296,8 +291,6 @@ void StorageS3Queue::read(
         read_from_format_info.source_header,
         read_from_format_info,
         std::move(this_ptr),
-        // storage_snapshot,
-        // column_names,
         local_context,
         max_block_size,
         num_streams);
@@ -313,7 +306,15 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const
     createIterator(nullptr);
     for (size_t i = 0; i < adjusted_num_streams; ++i)
         pipes.emplace_back(storage->createSource(info, iterator, max_block_size, context));
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 048248ef334..defff830411 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,6 +1,7 @@
 #include <Storages/StorageAzureBlob.h>
 #include "Processors/QueryPlan/QueryPlan.h"
 #include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
 
 
 #if USE_AZURE_BLOB_STORAGE
@@ -800,7 +801,14 @@ void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, cons
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 12a8eed106e..18acbfc7153 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1477,6 +1477,8 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
 
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
+    createIterator(nullptr);
+
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
@@ -1523,6 +1525,9 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
         pipe.resize(max_num_streams);
 
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 780a2755bcf..375a367bfab 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,3 +1,4 @@
+#include "Processors/Sources/NullSource.h"
 #include "config.h"
 #include <Common/ProfileEvents.h>
 #include "Parsers/ASTCreateQuery.h"
@@ -1352,7 +1353,14 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(read_from_format_info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index d6b6f5af61c..3f88966e3d3 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -34,6 +34,8 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 
@@ -201,6 +203,25 @@ public:
         }
     }
 
+    Impl(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    {
+        uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
+
+        ActionsDAGPtr filter_dag;
+        if (!uris.empty())
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(uris.size());
+            for (const auto & uri : uris)
+                paths.push_back(Poco::URI(uri).getPath());
+
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
+        }
+    }
+
     String next()
     {
         size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
@@ -223,6 +244,9 @@ private:
 StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, query, virtual_columns, context)) {}
 
+StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, predicate, virtual_columns, context)) {}
+
 String StorageURLSource::DisclosedGlobIterator::next()
 {
     return pimpl->next();
@@ -260,7 +284,6 @@ StorageURLSource::StorageURLSource(
     const ConnectionTimeouts & timeouts,
     CompressionMethod compression_method,
     size_t max_parsing_threads,
-    const SelectQueryInfo &,
     const HTTPHeaderEntries & headers_,
     const URIParams & params,
     bool glob_url,
@@ -874,7 +897,86 @@ bool IStorageURLBase::parallelizeOutputAfterReading(ContextPtr context) const
     return FormatFactory::instance().checkParallelizeOutputAfterReading(format_name, context);
 }
 
-Pipe IStorageURLBase::read(
+class ReadFromURL : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromURL"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromURL(
+        Block sample_block,
+        std::shared_ptr<StorageURL> storage_,
+        std::vector<String> * uri_options_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        std::vector<std::pair<std::string, std::string>> read_uri_params_,
+        std::function<void(std::ostream &)> read_post_data_callback_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , uri_options(uri_options_)
+        // , paths(std::move(paths_))
+        // , archive_info(std::move(archive_info_))
+        // , virtual_columns(std::move(virtual_columns_))
+        // , distributed_processing(distributed_processing_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , read_uri_params(std::move(read_uri_params_))
+        , read_post_data_callback(std::move(read_post_data_callback_))
+        // , total_bytes_to_read(total_bytes_to_read_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageURL> storage;
+    std::vector<String> * uri_options;
+
+    // std::vector<std::string> paths;
+    // std::optional<StorageFile::ArchiveInfo> archive_info;
+
+    // NamesAndTypesList virtual_columns;
+    // const bool distributed_processing;
+
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+    std::vector<std::pair<std::string, std::string>> read_uri_params;
+    std::function<void(std::ostream &)> read_post_data_callback;
+
+    // size_t total_bytes_to_read;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<StorageURLSource::IteratorWrapper> iterator_wrapper;
+    bool is_url_with_globs = false;
+    bool is_empty_glob = false;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromURL::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void IStorageURLBase::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -884,16 +986,61 @@ Pipe IStorageURLBase::read(
     size_t num_streams)
 {
     auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-
-    std::shared_ptr<StorageURLSource::IteratorWrapper> iterator_wrapper{nullptr};
-    bool is_url_with_globs = urlWithGlobs(uri);
-    size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements;
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    if (distributed_processing)
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
+
+    auto read_post_data_callback = getReadPOSTDataCallback(
+        read_from_format_info.columns_description.getNamesOfPhysical(),
+        read_from_format_info.columns_description,
+        query_info,
+        local_context,
+        processed_stage,
+        max_block_size);
+
+    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromURL>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        nullptr,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(params),
+        std::move(read_post_data_callback),
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromURL::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper || is_empty_glob)
+        return;
+
+    if (uri_options)
+    {
+        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, done = false]() mutable
+        {
+            if (done)
+                return StorageURLSource::FailoverOptions{};
+            done = true;
+            return *uri_options;
+        });
+
+        return;
+    }
+
+    size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
+    is_url_with_globs = urlWithGlobs(storage->uri);
+
+    if (storage->distributed_processing)
     {
         iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>(
-            [callback = local_context->getReadTaskCallback(), max_addresses]()
+            [callback = context->getReadTaskCallback(), max_addresses]()
             {
                 String next_uri = callback();
                 if (next_uri.empty())
@@ -904,11 +1051,14 @@ Pipe IStorageURLBase::read(
     else if (is_url_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, max_addresses, query_info.query, virtual_columns, local_context);
+        auto glob_iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(storage->uri, max_addresses, predicate, storage->virtual_columns, context);
 
         /// check if we filtered out all the paths
         if (glob_iterator->size() == 0)
-            return Pipe(std::make_shared<NullSource>(read_from_format_info.source_header));
+        {
+            is_empty_glob = true;
+            return;
+        }
 
         iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([glob_iterator, max_addresses]()
         {
@@ -928,53 +1078,70 @@ Pipe IStorageURLBase::read(
             if (done)
                 return StorageURLSource::FailoverOptions{};
             done = true;
-            return getFailoverOptions(uri, max_addresses);
+            return getFailoverOptions(storage->uri, max_addresses);
         });
         num_streams = 1;
     }
+}
 
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
+void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
+
+    if (is_empty_glob)
+    {
+        pipeline.init(Pipe(std::make_shared<NullSource>(info.source_header)));
+        return;
+    }
 
     Pipes pipes;
     pipes.reserve(num_streams);
 
-    const size_t max_threads = local_context->getSettingsRef().max_threads;
+    const size_t max_threads = context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
 
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageURLSource>(
-            read_from_format_info,
+            info,
             iterator_wrapper,
-            getReadMethod(),
-            getReadPOSTDataCallback(
-                read_from_format_info.columns_description.getNamesOfPhysical(),
-                read_from_format_info.columns_description,
-                query_info,
-                local_context,
-                processed_stage,
-                max_block_size),
-            format_name,
-            format_settings,
-            getName(),
-            local_context,
+            storage->getReadMethod(),
+            read_post_data_callback,
+            storage->format_name,
+            storage->format_settings,
+            storage->getName(),
+            context,
             max_block_size,
-            getHTTPTimeouts(local_context),
-            compression_method,
+            getHTTPTimeouts(context),
+            storage->compression_method,
             max_parsing_threads,
-            query_info,
-            headers,
-            params,
+            storage->headers,
+            read_uri_params,
             is_url_with_globs,
             need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    if (uri_options)
+        std::shuffle(uri_options->begin(), uri_options->end(), thread_local_rng);
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    size_t output_ports = pipe.numOutputPorts();
+    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < num_streams)
+        pipe.resize(num_streams);
+
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 
-Pipe StorageURLWithFailover::read(
+void StorageURLWithFailover::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -984,38 +1151,34 @@ Pipe StorageURLWithFailover::read(
     size_t num_streams)
 {
     auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-
-    auto iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, done = false]() mutable
-    {
-        if (done)
-            return StorageURLSource::FailoverOptions{};
-        done = true;
-        return uri_options;
-    });
-
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    const size_t max_threads = local_context->getSettingsRef().max_threads;
-    const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
 
-    auto pipe = Pipe(std::make_shared<StorageURLSource>(
-        read_from_format_info,
-        iterator_wrapper,
-        getReadMethod(),
-        getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size),
-        format_name,
-        format_settings,
-        getName(),
+    auto read_post_data_callback = getReadPOSTDataCallback(
+        read_from_format_info.columns_description.getNamesOfPhysical(),
+        read_from_format_info.columns_description,
+        query_info,
+        local_context,
+        processed_stage,
+        max_block_size);
+
+    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromURL>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        &uri_options,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(params),
+        std::move(read_post_data_callback),
         local_context,
         max_block_size,
-        getHTTPTimeouts(local_context),
-        compression_method,
-        max_parsing_threads,
-        query_info,
-        headers,
-        params));
-    std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng);
-    return pipe;
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
 }
 
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 8d027025882..1b2fb97cb28 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -34,7 +34,8 @@ class PullingPipelineExecutor;
 class IStorageURLBase : public IStorage
 {
 public:
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -67,6 +68,8 @@ public:
         const ContextPtr & context);
 
 protected:
+    friend class ReadFromURL;
+
     IStorageURLBase(
         const String & uri_,
         ContextPtr context_,
@@ -137,6 +140,7 @@ public:
     {
     public:
         DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+        DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 
         String next();
         size_t size();
@@ -162,7 +166,6 @@ public:
         const ConnectionTimeouts & timeouts,
         CompressionMethod compression_method,
         size_t max_parsing_threads,
-        const SelectQueryInfo & query_info,
         const HTTPHeaderEntries & headers_ = {},
         const URIParams & params = {},
         bool glob_url = false,
@@ -317,7 +320,8 @@ public:
         ContextPtr context_,
         const String & compression_method_);
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp
index a569c50835c..a274b1ba4db 100644
--- a/src/Storages/StorageXDBC.cpp
+++ b/src/Storages/StorageXDBC.cpp
@@ -102,7 +102,8 @@ std::function<void(std::ostream &)> StorageXDBC::getReadPOSTDataCallback(
     return write_body_callback;
 }
 
-Pipe StorageXDBC::read(
+void StorageXDBC::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -114,7 +115,7 @@ Pipe StorageXDBC::read(
     storage_snapshot->check(column_names);
 
     bridge_helper->startBridgeSync();
-    return IStorageURLBase::read(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
+    IStorageURLBase::read(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
 }
 
 SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h
index 1c1651cb333..fe678785dc2 100644
--- a/src/Storages/StorageXDBC.h
+++ b/src/Storages/StorageXDBC.h
@@ -19,7 +19,8 @@ namespace DB
 class StorageXDBC : public IStorageURLBase
 {
 public:
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,

From f2dfe8bddabb05194d0c380df13e8ae836fc24fa Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 2 Jan 2024 16:42:17 +0100
Subject: [PATCH 094/204] Fix build

---
 src/Storages/StorageMaterializedView.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 458e0c9ab6b..59f1d5eee1b 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -72,7 +72,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
-    const StorageID & getTargetTableId() const { return target_table_id; }
+    StorageID getTargetTableId() const { return target_table_id; }
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;
@@ -119,7 +119,6 @@ private:
     std::tuple<ContextMutablePtr, std::shared_ptr<ASTInsertQuery>> prepareRefresh() const;
     StorageID exchangeTargetTable(StorageID fresh_table, ContextPtr refresh_context);
 
-    StorageID getTargetTableId() const;
     void setTargetTableId(StorageID id);
     void updateTargetTableId(std::optional<String> database_name, std::optional<String> table_name);
 };

From c5ba97f0cd65012dc98a6ac1af125509d1c91c28 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 2 Jan 2024 16:38:45 +0000
Subject: [PATCH 095/204] Fix tests

---
 src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index e3fbece37ea..883f17ab215 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -339,14 +339,14 @@ void MergeTreeReaderCompact::readData(
         ColumnPtr temp_column;
 
         auto it = columns_cache_for_subcolumns.find(name_type_in_storage.name);
-        if (it != columns_cache_for_subcolumns.end())
+        if (!name_level_for_offsets.has_value() && it != columns_cache_for_subcolumns.end())
         {
             temp_column = it->second;
             columns_cache_was_used = true;
         }
         else
         {
-            /// In case of reading onlys offset use the correct serialization for reading of the prefix
+            /// In case of reading only offset use the correct serialization for reading of the prefix
             auto serialization = getSerializationInPart(name_type_in_storage);
             temp_column = name_type_in_storage.type->createColumn(*serialization);
 
@@ -361,7 +361,9 @@ void MergeTreeReaderCompact::readData(
             deserialize_settings.getter = buffer_getter;
             serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
             serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr);
-            columns_cache_for_subcolumns[name_type_in_storage.name] = temp_column;
+
+            if (!name_level_for_offsets.has_value())
+                columns_cache_for_subcolumns[name_type_in_storage.name] = temp_column;
         }
 
         auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);

From 8936c8376a05030b5559364cd65ef4db5ab7af87 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:14:16 +0000
Subject: [PATCH 096/204] Use predicate in getTaskIteratorExtension.

---
 src/Storages/HDFS/StorageHDFSCluster.cpp |   4 +-
 src/Storages/HDFS/StorageHDFSCluster.h   |   2 +-
 src/Storages/IStorageCluster.cpp         | 110 ++++++++++++++++++++---
 src/Storages/IStorageCluster.h           |   7 +-
 src/Storages/StorageAzureBlobCluster.cpp |   4 +-
 src/Storages/StorageAzureBlobCluster.h   |   2 +-
 src/Storages/StorageDistributed.cpp      |  61 ++++++++++++-
 src/Storages/StorageFileCluster.cpp      |   4 +-
 src/Storages/StorageFileCluster.h        |   2 +-
 src/Storages/StorageS3Cluster.cpp        |   4 +-
 src/Storages/StorageS3Cluster.h          |   2 +-
 src/Storages/StorageURL.cpp              |   6 +-
 src/Storages/StorageURLCluster.cpp       |   4 +-
 src/Storages/StorageURLCluster.h         |   2 +-
 14 files changed, 174 insertions(+), 40 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index bff22936e95..2e8129b9845 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -79,9 +79,9 @@ void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String
 }
 
 
-RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, query, virtual_columns, context);
+    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, virtual_columns, context);
     auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h
index 8ad4a83c5b9..7c4c41a573a 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.h
+++ b/src/Storages/HDFS/StorageHDFSCluster.h
@@ -35,7 +35,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index 1447dad1374..c59b74255b2 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -2,6 +2,9 @@
 
 #include "Common/Exception.h"
 #include "Core/QueryProcessingStage.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
+#include "QueryPipeline/QueryPipelineBuilder.h"
 #include <DataTypes/DataTypeString.h>
 #include <IO/ConnectionTimeouts.h>
 #include <Interpreters/Context.h>
@@ -38,9 +41,66 @@ IStorageCluster::IStorageCluster(
 {
 }
 
+class ReadFromCluster : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromCluster"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromCluster(
+        Block sample_block,
+        std::shared_ptr<IStorageCluster> storage_,
+        ASTPtr query_to_send_,
+        QueryProcessingStage::Enum processed_stage_,
+        ClusterPtr cluster_,
+        Poco::Logger * log_,
+        ContextPtr context_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , query_to_send(std::move(query_to_send_))
+        , processed_stage(processed_stage_)
+        , cluster(std::move(cluster_))
+        , log(log_)
+        , context(std::move(context_))
+    {
+    }
+
+private:
+    std::shared_ptr<IStorageCluster> storage;
+    ASTPtr query_to_send;
+    QueryProcessingStage::Enum processed_stage;
+    ClusterPtr cluster;
+    Poco::Logger * log;
+    ContextPtr context;
+
+    std::optional<RemoteQueryExecutor::Extension> extension;
+
+    void createExtension(const ActionsDAG::Node * predicate);
+    ContextPtr updateSettings(const Settings & settings);
+};
+
+void ReadFromCluster::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createExtension(predicate);
+}
+
+void ReadFromCluster::createExtension(const ActionsDAG::Node * predicate)
+{
+    if (extension)
+        return;
+
+    extension = storage->getTaskIteratorExtension(predicate, context);
+}
 
 /// The code executes on initiator
-Pipe IStorageCluster::read(
+void IStorageCluster::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -49,10 +109,10 @@ Pipe IStorageCluster::read(
     size_t /*max_block_size*/,
     size_t /*num_streams*/)
 {
-    updateBeforeRead(context);
+    storage_snapshot->check(column_names);
 
+    updateBeforeRead(context);
     auto cluster = getCluster(context);
-    auto extension = getTaskIteratorExtension(query_info.query, context);
 
     /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*)
 
@@ -70,12 +130,6 @@ Pipe IStorageCluster::read(
         query_to_send = interpreter.getQueryInfo().query->clone();
     }
 
-    const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
-
-    Pipes pipes;
-
-    const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
-
     if (!structure_argument_was_provided)
         addColumnsStructureToQuery(query_to_send, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), context);
 
@@ -89,7 +143,29 @@ Pipe IStorageCluster::read(
                                       /* only_replace_in_join_= */true);
     visitor.visit(query_to_send);
 
-    auto new_context = updateSettings(context, context->getSettingsRef());
+    auto this_ptr = std::static_pointer_cast<IStorageCluster>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromCluster>(
+        sample_block,
+        std::move(this_ptr),
+        std::move(query_to_send),
+        processed_stage,
+        cluster,
+        log,
+        context);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createExtension(nullptr);
+
+    const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
+    const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
+
+    Pipes pipes;
+    auto new_context = updateSettings(context->getSettingsRef());
     const auto & current_settings = new_context->getSettingsRef();
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
     for (const auto & shard_info : cluster->getShardsInfo())
@@ -100,7 +176,7 @@ Pipe IStorageCluster::read(
             auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                 std::vector<IConnectionPool::Entry>{try_result},
                 queryToString(query_to_send),
-                sample_block,
+                getOutputStream().header,
                 new_context,
                 /*throttler=*/nullptr,
                 scalars,
@@ -113,8 +189,14 @@ Pipe IStorageCluster::read(
         }
     }
 
-    storage_snapshot->check(column_names);
-    return Pipe::unitePipes(std::move(pipes));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
@@ -129,7 +211,7 @@ QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
     return QueryProcessingStage::Enum::FetchColumns;
 }
 
-ContextPtr IStorageCluster::updateSettings(ContextPtr context, const Settings & settings)
+ContextPtr ReadFromCluster::updateSettings(const Settings & settings)
 {
     Settings new_settings = settings;
 
diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h
index b15ed37202a..b233f20103d 100644
--- a/src/Storages/IStorageCluster.h
+++ b/src/Storages/IStorageCluster.h
@@ -22,7 +22,8 @@ public:
         Poco::Logger * log_,
         bool structure_argument_was_provided_);
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -33,7 +34,7 @@ public:
 
     ClusterPtr getCluster(ContextPtr context) const;
     /// Query is needed for pruning by virtual columns (_file, _path)
-    virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const = 0;
+    virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const = 0;
 
     QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
 
@@ -45,8 +46,6 @@ protected:
     virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0;
 
 private:
-    ContextPtr updateSettings(ContextPtr context, const Settings & settings);
-
     Poco::Logger * log;
     String cluster_name;
     bool structure_argument_was_provided;
diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp
index b8f95458379..a6372577fb0 100644
--- a/src/Storages/StorageAzureBlobCluster.cpp
+++ b/src/Storages/StorageAzureBlobCluster.cpp
@@ -69,11 +69,11 @@ void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const S
     TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
     auto iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
         object_storage.get(), configuration.container, configuration.blob_path,
-        query, virtual_columns, context, nullptr);
+        predicate, virtual_columns, context, nullptr);
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next().relative_path; });
     return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
 }
diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h
index 2900243708c..2831b94f825 100644
--- a/src/Storages/StorageAzureBlobCluster.h
+++ b/src/Storages/StorageAzureBlobCluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a928a4daf63..c914388e55e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,6 +30,7 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
+#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -1068,15 +1069,67 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistribu
     return pipeline;
 }
 
+static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context)
+{
+    QueryPlan plan;
+    SelectQueryOptions options;
+    options.only_analyze = true;
+    if (context->getSettingsRef().allow_experimental_analyzer)
+    {
+        InterpreterSelectQueryAnalyzer interpreter(ast, context, options);
+        plan = std::move(interpreter).extractQueryPlan();
+    }
+    else
+    {
+        InterpreterSelectWithUnionQuery interpreter(ast, context, options);
+        interpreter.buildQueryPlan(plan);
+    }
+
+    plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
+
+    std::stack<QueryPlan::Node *> nodes;
+    nodes.push(plan.getRootNode());
+
+    SourceStepWithFilter * source = nullptr;
+
+    while (!nodes.empty())
+    {
+        const auto * node = nodes.top();
+        nodes.pop();
+
+        if (auto * with_filter = dynamic_cast<SourceStepWithFilter *>(node->step.get()))
+        {
+            if (source)
+            {
+                WriteBufferFromOwnString buf;
+                plan.explainPlan(buf, {});
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Found multiple source steps for query\n{}\nPlan\n{}",
+                    queryToString(ast), buf.str());
+            }
+
+            source = with_filter;
+        }
+    }
+
+    if (!source)
+        return nullptr;
+
+    return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes, {}, context);
+}
+
 
 std::optional<QueryPipeline> StorageDistributed::distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) const
 {
     const auto & settings = local_context->getSettingsRef();
-    auto & select = query.select->as<ASTSelectWithUnionQuery &>();
+
+    auto filter = getFilterFromQuery(query.select, local_context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter)
+        predicate = filter->getOutputs().at(0);
+
     /// Select query is needed for pruining on virtual columns
-    auto extension = src_storage_cluster.getTaskIteratorExtension(
-        select.list_of_selects->children.at(0)->as<ASTSelectQuery>()->clone(),
-        local_context);
+    auto extension = src_storage_cluster.getTaskIteratorExtension(predicate, local_context);
 
     auto dst_cluster = getCluster();
 
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index 782c36c9819..c12124f1e07 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -71,9 +71,9 @@ void StorageFileCluster::addColumnsStructureToQuery(ASTPtr & query, const String
     TableFunctionFileCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, std::nullopt, query, virtual_columns, context);
+    auto iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, std::nullopt, predicate, virtual_columns, context);
     auto callback = std::make_shared<TaskIterator>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h
index e907fbad0de..a6e57c3bb4f 100644
--- a/src/Storages/StorageFileCluster.h
+++ b/src/Storages/StorageFileCluster.h
@@ -31,7 +31,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override { return virtual_columns; }
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index 702b1f14ae7..e1738056e9d 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -78,10 +78,10 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
     s3_configuration.update(local_context);
 }
 
-RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
     auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-        *s3_configuration.client, s3_configuration.url, query, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
+        *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
index 81fb48d2398..c526f14834a 100644
--- a/src/Storages/StorageS3Cluster.h
+++ b/src/Storages/StorageS3Cluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 3f88966e3d3..ac17f880738 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -906,7 +906,7 @@ public:
 
     ReadFromURL(
         Block sample_block,
-        std::shared_ptr<StorageURL> storage_,
+        std::shared_ptr<IStorageURLBase> storage_,
         std::vector<String> * uri_options_,
         ReadFromFormatInfo info_,
         const bool need_only_count_,
@@ -934,7 +934,7 @@ public:
     }
 
 private:
-    std::shared_ptr<StorageURL> storage;
+    std::shared_ptr<IStorageURLBase> storage;
     std::vector<String> * uri_options;
 
     // std::vector<std::string> paths;
@@ -999,7 +999,7 @@ void IStorageURLBase::read(
         processed_stage,
         max_block_size);
 
-    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+    auto this_ptr = std::static_pointer_cast<IStorageURLBase>(shared_from_this());
 
     auto reading = std::make_unique<ReadFromURL>(
         read_from_format_info.source_header,
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index c052e781877..a0b5fcd6f28 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -81,9 +81,9 @@ void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String
     TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, context->getSettingsRef().glob_expansion_max_elements, query, virtual_columns, context);
+    auto iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, virtual_columns, context);
     auto callback = std::make_shared<TaskIterator>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h
index ddf7e6f0790..07978040029 100644
--- a/src/Storages/StorageURLCluster.h
+++ b/src/Storages/StorageURLCluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override { return virtual_columns; }
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 

From c808b03e55882beaff7e9e58208546af9cd34760 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:27:33 +0000
Subject: [PATCH 097/204] Remove unneeded code

---
 src/Storages/HDFS/StorageHDFS.cpp | 33 -------------
 src/Storages/HDFS/StorageHDFS.h   |  1 -
 src/Storages/StorageAzureBlob.cpp | 64 +-----------------------
 src/Storages/StorageAzureBlob.h   | 13 -----
 src/Storages/StorageFile.cpp      | 18 -------
 src/Storages/StorageFile.h        |  8 ---
 src/Storages/StorageS3.cpp        | 82 +------------------------------
 src/Storages/StorageS3.h          | 10 ----
 src/Storages/StorageURL.cpp       | 22 ---------
 src/Storages/StorageURL.h         |  1 -
 10 files changed, 2 insertions(+), 250 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index c7cbaa1e561..430ecc7a585 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -440,34 +440,6 @@ public:
         uris_iter = uris.begin();
     }
 
-    Impl(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-        uris = getPathsList(path_from_uri, uri_without_path, context);
-        ASTPtr filter_ast;
-        if (!uris.empty())
-             filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, uris[0].path, context);
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & path_with_info : uris)
-                paths.push_back(path_with_info.path);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast);
-        }
-        auto file_progress_callback = context->getFileProgressCallback();
-
-        for (auto & elem : uris)
-        {
-            elem.path = uri_without_path + elem.path;
-            if (file_progress_callback && elem.info)
-                file_progress_callback(FileProgress(0, elem.info->size));
-        }
-        uris_iter = uris.begin();
-    }
-
     StorageHDFS::PathWithInfo next()
     {
         std::lock_guard lock(mutex);
@@ -549,9 +521,6 @@ private:
     std::function<void(FileProgress)> file_progress_callback;
 };
 
-HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, query, virtual_columns, context)) {}
-
 HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
 
@@ -577,7 +546,6 @@ HDFSSource::HDFSSource(
     UInt64 max_block_size_,
     std::shared_ptr<IteratorWrapper> file_iterator_,
     bool need_only_count_)
-    //const SelectQueryInfo & query_info_)
     : ISource(info.source_header, false)
     , WithContext(context_)
     , storage(std::move(storage_))
@@ -588,7 +556,6 @@ HDFSSource::HDFSSource(
     , file_iterator(file_iterator_)
     , columns_description(info.columns_description)
     , need_only_count(need_only_count_)
-    //, query_info(query_info_)
 {
     initialize();
 }
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index cee1b674eb7..9d9a857bf4e 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -115,7 +115,6 @@ public:
     class DisclosedGlobIterator
     {
         public:
-            DisclosedGlobIterator(const String & uri_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index defff830411..294a65c067f 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -911,53 +911,6 @@ bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const
     return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
 }
 
-StorageAzureBlobSource::GlobIterator::GlobIterator(
-    AzureObjectStorage * object_storage_,
-    const std::string & container_,
-    String blob_path_with_globs_,
-    ASTPtr query_,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
-    RelativePathsWithMetadata * outer_blobs_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : IIterator(context_)
-    , object_storage(object_storage_)
-    , container(container_)
-    , blob_path_with_globs(blob_path_with_globs_)
-    , query(query_)
-    , virtual_columns(virtual_columns_)
-    , outer_blobs(outer_blobs_)
-    , file_progress_callback(file_progress_callback_)
-{
-
-    const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
-
-    /// We don't have to list bucket, because there is no asterisks.
-    if (key_prefix.size() == blob_path_with_globs.size())
-    {
-        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
-        blobs_with_metadata.emplace_back(
-            blob_path_with_globs,
-            object_metadata);
-        if (outer_blobs)
-            outer_blobs->emplace_back(blobs_with_metadata.back());
-        if (file_progress_callback)
-            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-        is_finished = true;
-        return;
-    }
-
-    object_storage_iterator = object_storage->iterate(key_prefix);
-
-    matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(blob_path_with_globs));
-
-    if (!matcher->ok())
-        throw Exception(
-            ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error());
-
-    recursive = blob_path_with_globs == "/**" ? true : false;
-}
-
 StorageAzureBlobSource::GlobIterator::GlobIterator(
     AzureObjectStorage * object_storage_,
     const std::string & container_,
@@ -1004,7 +957,6 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     recursive = blob_path_with_globs == "/**" ? true : false;
 
     filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-    is_initialized = true;
 }
 
 RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
@@ -1044,22 +996,8 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
         }
 
         index = 0;
-        if (!is_initialized)
-        {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / new_batch.front().relative_path, getContext());
-            is_initialized = true;
-        }
 
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(new_batch.size());
-            for (auto & path_with_metadata : new_batch)
-                paths.push_back(fs::path(container) / path_with_metadata.relative_path);
-
-            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, query, virtual_columns, getContext(), filter_ast);
-        }
-        else if (filter_dag)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(new_batch.size());
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 30b91b7f85a..cc908fa3215 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -155,16 +155,6 @@ public:
     class GlobIterator : public IIterator
     {
     public:
-        GlobIterator(
-            AzureObjectStorage * object_storage_,
-            const std::string & container_,
-            String blob_path_with_globs_,
-            ASTPtr query_,
-            const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
-            RelativePathsWithMetadata * outer_blobs_,
-            std::function<void(FileProgress)> file_progress_callback_ = {});
-
         GlobIterator(
             AzureObjectStorage * object_storage_,
             const std::string & container_,
@@ -182,8 +172,6 @@ public:
         AzureObjectStorage * object_storage;
         std::string container;
         String blob_path_with_globs;
-        ASTPtr query;
-        ASTPtr filter_ast;
         ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
@@ -198,7 +186,6 @@ public:
 
         void createFilterAST(const String & any_key);
         bool is_finished = false;
-        bool is_initialized = false;
         std::mutex next_mutex;
 
         std::function<void(FileProgress)> file_progress_callback;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 18acbfc7153..47d29c3c501 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -932,24 +932,6 @@ static std::chrono::seconds getLockTimeout(ContextPtr context)
 
 using StorageFilePtr = std::shared_ptr<StorageFile>;
 
-
-StorageFileSource::FilesIterator::FilesIterator(
-    const Strings & files_,
-    std::optional<StorageFile::ArchiveInfo> archive_info_,
-    ASTPtr query,
-    const NamesAndTypesList & virtual_columns,
-    ContextPtr context_,
-    bool distributed_processing_)
-    : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
-{
-    ASTPtr filter_ast;
-    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
-        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, files[0], context_);
-
-    if (filter_ast)
-        VirtualColumnUtils::filterByPathOrFile(files, files, query, virtual_columns, context_, filter_ast);
-}
-
 StorageFileSource::FilesIterator::FilesIterator(
     const Strings & files_,
     std::optional<StorageFile::ArchiveInfo> archive_info_,
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index ecb9e01b862..4f8cbfd4795 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -193,14 +193,6 @@ public:
     class FilesIterator
     {
     public:
-        explicit FilesIterator(
-            const Strings & files_,
-            std::optional<StorageFile::ArchiveInfo> archive_info_,
-            ASTPtr query,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context_,
-            bool distributed_processing_ = false);
-
         explicit FilesIterator(
             const Strings & files_,
             std::optional<StorageFile::ArchiveInfo> archive_info_,
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 375a367bfab..88ea57e21cc 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -260,55 +260,6 @@ using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 class StorageS3Source::DisclosedGlobIterator::Impl : WithContext
 {
 public:
-    Impl(
-        const S3::Client & client_,
-        const S3::URI & globbed_uri_,
-        ASTPtr & query_,
-        const NamesAndTypesList & virtual_columns_,
-        ContextPtr context_,
-        KeysWithInfo * read_keys_,
-        const S3Settings::RequestSettings & request_settings_,
-        std::function<void(FileProgress)> file_progress_callback_)
-        : WithContext(context_)
-        , client(client_.clone())
-        , globbed_uri(globbed_uri_)
-        , query(query_)
-        , virtual_columns(virtual_columns_)
-        , read_keys(read_keys_)
-        , request_settings(request_settings_)
-        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
-        , file_progress_callback(file_progress_callback_)
-    {
-        if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
-            throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
-
-        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
-
-        /// We don't have to list bucket, because there is no asterisks.
-        if (key_prefix.size() == globbed_uri.key.size())
-        {
-            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
-            buffer_iter = buffer.begin();
-            is_finished = true;
-            return;
-        }
-
-        request.SetBucket(globbed_uri.bucket);
-        request.SetPrefix(key_prefix);
-        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-        outcome_future = listObjectsAsync();
-
-        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
-        if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
-
-        recursive = globbed_uri.key == "/**" ? true : false;
-        fillInternalBufferAssumeLocked();
-    }
-
     Impl(
         const S3::Client & client_,
         const S3::URI & globbed_uri_,
@@ -357,7 +308,6 @@ public:
         fillInternalBufferAssumeLocked();
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        is_initialized = true;
     }
 
     KeyWithInfoPtr next()
@@ -475,22 +425,7 @@ private:
             return;
         }
 
-        if (!is_initialized)
-        {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
-            is_initialized = true;
-        }
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(temp_buffer.size());
-            for (const auto & key_with_info : temp_buffer)
-                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
-
-            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast);
-        }
-        else if (filter_dag)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(temp_buffer.size());
@@ -539,8 +474,6 @@ private:
     S3::URI globbed_uri;
     ASTPtr query;
     NamesAndTypesList virtual_columns;
-    bool is_initialized{false};
-    ASTPtr filter_ast;
     ActionsDAGPtr filter_dag;
     std::unique_ptr<re2::RE2> matcher;
     bool recursive{false};
@@ -556,19 +489,6 @@ private:
     std::function<void(FileProgress)> file_progress_callback;
 };
 
-StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
-    const S3::Client & client_,
-    const S3::URI & globbed_uri_,
-    ASTPtr query,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context,
-    KeysWithInfo * read_keys_,
-    const S3Settings::RequestSettings & request_settings_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
-{
-}
-
 StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     const S3::Client & client_,
     const S3::URI & globbed_uri_,
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index dd7e0edb2d9..f63bf3a8e90 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -75,16 +75,6 @@ public:
     class DisclosedGlobIterator : public IIterator
     {
     public:
-        DisclosedGlobIterator(
-            const S3::Client & client_,
-            const S3::URI & globbed_uri_,
-            ASTPtr query,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context,
-            KeysWithInfo * read_keys_ = nullptr,
-            const S3Settings::RequestSettings & request_settings_ = {},
-            std::function<void(FileProgress)> progress_callback_ = {});
-
         DisclosedGlobIterator(
             const S3::Client & client_,
             const S3::URI & globbed_uri_,
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index ac17f880738..6ed535a0317 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -184,25 +184,6 @@ namespace
 class StorageURLSource::DisclosedGlobIterator::Impl
 {
 public:
-    Impl(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
-
-        ASTPtr filter_ast;
-        if (!uris.empty())
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, Poco::URI(uris[0]).getPath(), context);
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & uri : uris)
-                paths.push_back(Poco::URI(uri).getPath());
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast);
-        }
-    }
-
     Impl(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
@@ -241,9 +222,6 @@ private:
     std::atomic_size_t index = 0;
 };
 
-StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, query, virtual_columns, context)) {}
-
 StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, predicate, virtual_columns, context)) {}
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 1b2fb97cb28..c9e6f6311bf 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -139,7 +139,6 @@ public:
     class DisclosedGlobIterator
     {
     public:
-        DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
         DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 
         String next();

From 1b20ce51624f996fc3995c5c511ecce2e6de872a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:50:06 +0000
Subject: [PATCH 098/204] Cleanup

---
 src/Storages/HDFS/StorageHDFS.cpp       |  2 +-
 src/Storages/IStorageCluster.cpp        | 16 ++++----
 src/Storages/S3Queue/StorageS3Queue.cpp | 12 +++---
 src/Storages/StorageAzureBlob.cpp       |  7 ++--
 src/Storages/StorageDistributed.cpp     |  2 +-
 src/Storages/StorageFile.cpp            | 50 +++++++------------------
 src/Storages/StorageS3.cpp              |  6 +--
 src/Storages/StorageURL.cpp             |  4 +-
 8 files changed, 38 insertions(+), 61 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 430ecc7a585..c1c0f7d76bd 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -1,4 +1,3 @@
-#include "Processors/Sources/NullSource.h"
 #include "config.h"
 
 #if USE_HDFS
@@ -16,6 +15,7 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index c59b74255b2..6f42d8f855c 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -1,10 +1,7 @@
-#include "Storages/IStorageCluster.h"
+#include <Storages/IStorageCluster.h>
 
-#include "Common/Exception.h"
-#include "Core/QueryProcessingStage.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-#include "QueryPipeline/QueryPipelineBuilder.h"
+#include <Common/Exception.h>
+#include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeString.h>
 #include <IO/ConnectionTimeouts.h>
 #include <Interpreters/Context.h>
@@ -14,11 +11,14 @@
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Parsers/queryToString.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/Sources/RemoteSource.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <QueryPipeline/narrowPipe.h>
 #include <QueryPipeline/Pipe.h>
-#include <Processors/Sources/RemoteSource.h>
 #include <QueryPipeline/RemoteQueryExecutor.h>
-#include <Parsers/queryToString.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/IStorage.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageDictionary.h>
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 6d078e1aa1b..bc33e8cf2a9 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,7 +1,3 @@
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-#include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
 #if USE_AWS_S3
@@ -10,11 +6,14 @@
 #include <IO/CompressionMethod.h>
 #include <Formats/FormatFactory.h>
 #include <Interpreters/InterpreterInsertQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTInsertQuery.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/ISource.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <Processors/Sources/NullSource.h>
 #include <Storages/S3Queue/S3QueueTableMetadata.h>
 #include <Storages/S3Queue/StorageS3Queue.h>
 #include <Storages/S3Queue/S3QueueFilesMetadata.h>
@@ -24,6 +23,7 @@
 #include <Storages/StorageSnapshot.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/prepareReadingFromFormat.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <filesystem>
 
 
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 294a65c067f..4f63b144f43 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,8 +1,4 @@
 #include <Storages/StorageAzureBlob.h>
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-
 
 #if USE_AZURE_BLOB_STORAGE
 #include <Formats/FormatFactory.h>
@@ -24,6 +20,9 @@
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageSnapshot.h>
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index c914388e55e..7ef2ff08827 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,7 +30,6 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
-#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -92,6 +91,7 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/Sinks/EmptySink.h>
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 47d29c3c501..60e06291200 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -38,6 +38,8 @@
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
@@ -45,8 +47,6 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
-#include <Processors/QueryPlan/QueryPlan.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -1330,25 +1330,15 @@ public:
     ReadFromFile(
         Block sample_block,
         std::shared_ptr<StorageFile> storage_,
-        std::vector<std::string> paths_,
-        std::optional<StorageFile::ArchiveInfo> archive_info_,
-        NamesAndTypesList virtual_columns_,
-        bool distributed_processing_,
         ReadFromFormatInfo info_,
         const bool need_only_count_,
-        size_t total_bytes_to_read_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , storage(std::move(storage_))
-        , paths(std::move(paths_))
-        , archive_info(std::move(archive_info_))
-        , virtual_columns(std::move(virtual_columns_))
-        , distributed_processing(distributed_processing_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
-        , total_bytes_to_read(total_bytes_to_read_)
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , max_num_streams(num_streams_)
@@ -1357,28 +1347,15 @@ public:
 
 private:
     std::shared_ptr<StorageFile> storage;
-
-    std::vector<std::string> paths;
-    std::optional<StorageFile::ArchiveInfo> archive_info;
-
-    NamesAndTypesList virtual_columns;
-    const bool distributed_processing;
-
     ReadFromFormatInfo info;
     const bool need_only_count;
 
-    size_t total_bytes_to_read;
-
     ContextPtr context;
-
     size_t max_block_size;
     const size_t max_num_streams;
 
     std::shared_ptr<StorageFileSource::FilesIterator> files_iterator;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 
@@ -1435,13 +1412,8 @@ void StorageFile::read(
     auto reading = std::make_unique<ReadFromFile>(
         read_from_format_info.source_header,
         std::move(this_ptr),
-        paths,
-        archive_info,
-        virtual_columns,
-        distributed_processing,
         std::move(read_from_format_info),
         need_only_count,
-        total_bytes_to_read,
         context,
         max_block_size,
         num_streams);
@@ -1454,7 +1426,13 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
     if (files_iterator)
         return;
 
-    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, predicate, virtual_columns, context, distributed_processing);
+    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(
+        storage->paths,
+        storage->archive_info,
+        predicate,
+        storage->virtual_columns,
+        context,
+        storage->distributed_processing);
 }
 
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
@@ -1464,10 +1442,10 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
-    if (archive_info)
-        files_to_read = archive_info->paths_to_archives.size();
+    if (storage->archive_info)
+        files_to_read = storage->archive_info->paths_to_archives.size();
     else
-        files_to_read = paths.size();
+        files_to_read = storage->paths.size();
 
     if (max_num_streams > files_to_read)
         num_streams = files_to_read;
@@ -1478,8 +1456,8 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     /// Set total number of bytes to process. For progress bar.
     auto progress_callback = context->getFileProgressCallback();
 
-    if (progress_callback && !archive_info)
-        progress_callback(FileProgress(0, total_bytes_to_read));
+    if (progress_callback && !storage->archive_info)
+        progress_callback(FileProgress(0, storage->total_bytes_to_read));
 
     for (size_t i = 0; i < num_streams; ++i)
     {
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 88ea57e21cc..3e1af2df4b0 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,7 +1,4 @@
-#include "Processors/Sources/NullSource.h"
 #include "config.h"
-#include <Common/ProfileEvents.h>
-#include "Parsers/ASTCreateQuery.h"
 
 #if USE_AWS_S3
 
@@ -17,6 +14,7 @@
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTInsertQuery.h>
+#include <Parsers/ASTCreateQuery.h>
 
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageS3.h>
@@ -43,6 +41,7 @@
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 
@@ -58,6 +57,7 @@
 #include <Common/parseGlobs.h>
 #include <Common/quoteString.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ProfileEvents.h>
 
 #include <Processors/ISource.h>
 #include <Processors/Sinks/SinkToStorage.h>
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6ed535a0317..3389ed1db86 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -26,6 +26,8 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Common/ThreadStatus.h>
 #include <Common/parseRemoteDescription.h>
@@ -34,8 +36,6 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 

From 4f99a8bc1f7f8a3d5e3ad9188ae649caefec8ed5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:54:20 +0000
Subject: [PATCH 099/204] Remove more unused code.

---
 src/Storages/VirtualColumnUtils.cpp | 38 -----------------------------
 src/Storages/VirtualColumnUtils.h   | 19 ---------------
 2 files changed, 57 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index b63b4e7cca7..e54528bbf01 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -424,44 +424,6 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     return block.getByName("_idx").column;
 }
 
-ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context)
-{
-    if (!query || virtual_columns.empty())
-        return {};
-
-    Block block;
-    for (const auto & column : virtual_columns)
-    {
-        if (column.name == "_file" || column.name == "_path")
-            block.insert({column.type->createColumn(), column.type, column.name});
-    }
-    /// Create a block with one row to construct filter
-    /// Append "idx" column as the filter result
-    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-    addPathAndFileToVirtualColumns(block, path_example, 0);
-    ASTPtr filter_ast;
-    prepareFilterBlockWithQuery(query, context, block, filter_ast);
-    return filter_ast;
-}
-
-ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast)
-{
-    Block block;
-    for (const auto & column : virtual_columns)
-    {
-        if (column.name == "_file" || column.name == "_path")
-            block.insert({column.type->createColumn(), column.type, column.name});
-    }
-    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-
-    for (size_t i = 0; i != paths.size(); ++i)
-        addPathAndFileToVirtualColumns(block, paths[i], i);
-
-    filterBlockWithQuery(query, block, context, filter_ast);
-
-    return block.getByName("_idx").column;
-}
-
 void addRequestedPathFileAndSizeVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename)
 {
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 6e1af0995cc..3c07e33a177 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -58,25 +58,6 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 
 NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns);
 
-ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context);
-
-ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast);
-
-template <typename T>
-void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast)
-{
-    auto indexes_column = getFilterByPathAndFileIndexes(paths, query, virtual_columns, context, filter_ast);
-    const auto & indexes = typeid_cast<const ColumnUInt64 &>(*indexes_column).getData();
-    if (indexes.size() == sources.size())
-        return;
-
-    std::vector<T> filtered_sources;
-    filtered_sources.reserve(indexes.size());
-    for (auto index : indexes)
-        filtered_sources.emplace_back(std::move(sources[index]));
-    sources = std::move(filtered_sources);
-}
-
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
 ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);

From 9c25cb6692cfdcf410c9d735a77e9c2eb01fff78 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 18:08:04 +0000
Subject: [PATCH 100/204] Cleanup

---
 src/Storages/HDFS/StorageHDFS.cpp | 25 ++++---------------------
 src/Storages/HDFS/StorageHDFS.h   |  3 +--
 src/Storages/StorageURL.cpp       | 16 ----------------
 3 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index c1c0f7d76bd..974b2bb68cf 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -411,7 +411,6 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
 class HDFSSource::DisclosedGlobIterator::Impl
 {
 public:
-
     Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
@@ -854,10 +853,6 @@ public:
 
     ReadFromHDFS(
         Block sample_block,
-        std::vector<String> uris_,
-        bool distributed_processing_,
-        NamesAndTypesList virtual_columns_,
-        bool is_path_with_globs_,
         ReadFromFormatInfo info_,
         bool need_only_count_,
         std::shared_ptr<StorageHDFS> storage_,
@@ -865,10 +860,6 @@ public:
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
-        , uris(std::move(uris_))
-        , distributed_processing(distributed_processing_)
-        , virtual_columns(std::move(virtual_columns_))
-        , is_path_with_globs(is_path_with_globs_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , storage(std::move(storage_))
@@ -879,10 +870,6 @@ public:
     }
 
 private:
-    std::vector<String> uris;
-    const bool distributed_processing;
-    NamesAndTypesList virtual_columns;
-    bool is_path_with_globs;
     ReadFromFormatInfo info;
     const bool need_only_count;
     std::shared_ptr<StorageHDFS> storage;
@@ -924,10 +911,6 @@ void StorageHDFS::read(
 
     auto reading = std::make_unique<ReadFromHDFS>(
         read_from_format_info.source_header,
-        uris,
-        distributed_processing,
-        virtual_columns,
-        is_path_with_globs,
         std::move(read_from_format_info),
         need_only_count,
         std::move(this_ptr),
@@ -943,17 +926,17 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
     if (iterator_wrapper)
         return;
 
-    if (distributed_processing)
+    if (storage->distributed_processing)
     {
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
             [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
                 return StorageHDFS::PathWithInfo{callback(), std::nullopt};
         });
     }
-    else if (is_path_with_globs)
+    else if (storage->is_path_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], predicate, virtual_columns, context);
+        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
         {
             return glob_iterator->next();
@@ -961,7 +944,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
     }
     else
     {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, predicate, virtual_columns, context);
+        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
         {
             return uris_iterator->next();
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 9d9a857bf4e..f1f0019d3e0 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -94,6 +94,7 @@ public:
 
 protected:
     friend class HDFSSource;
+    friend class ReadFromHDFS;
 
 private:
     std::vector<String> uris;
@@ -144,7 +145,6 @@ public:
         UInt64 max_block_size_,
         std::shared_ptr<IteratorWrapper> file_iterator_,
         bool need_only_count_);
-        //const SelectQueryInfo & query_info_);
 
     String getName() const override;
 
@@ -163,7 +163,6 @@ private:
     ColumnsDescription columns_description;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    //SelectQueryInfo query_info;
 
     std::unique_ptr<ReadBuffer> read_buf;
     std::shared_ptr<IInputFormat> input_format;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 3389ed1db86..36219d13a45 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -896,15 +896,10 @@ public:
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , storage(std::move(storage_))
         , uri_options(uri_options_)
-        // , paths(std::move(paths_))
-        // , archive_info(std::move(archive_info_))
-        // , virtual_columns(std::move(virtual_columns_))
-        // , distributed_processing(distributed_processing_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , read_uri_params(std::move(read_uri_params_))
         , read_post_data_callback(std::move(read_post_data_callback_))
-        // , total_bytes_to_read(total_bytes_to_read_)
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -915,19 +910,11 @@ private:
     std::shared_ptr<IStorageURLBase> storage;
     std::vector<String> * uri_options;
 
-    // std::vector<std::string> paths;
-    // std::optional<StorageFile::ArchiveInfo> archive_info;
-
-    // NamesAndTypesList virtual_columns;
-    // const bool distributed_processing;
-
     ReadFromFormatInfo info;
     const bool need_only_count;
     std::vector<std::pair<std::string, std::string>> read_uri_params;
     std::function<void(std::ostream &)> read_post_data_callback;
 
-    // size_t total_bytes_to_read;
-
     ContextPtr context;
 
     size_t max_block_size;
@@ -937,9 +924,6 @@ private:
     bool is_url_with_globs = false;
     bool is_empty_glob = false;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 

From 488e36de469d9e93a2d3de6c2c61626d7e45b780 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 2 Jan 2024 20:20:39 +0000
Subject: [PATCH 101/204] More fixes

---
 .../MergeTree/MergeTreeReaderCompact.cpp      | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 883f17ab215..02048009296 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -339,9 +339,15 @@ void MergeTreeReaderCompact::readData(
         ColumnPtr temp_column;
 
         auto it = columns_cache_for_subcolumns.find(name_type_in_storage.name);
-        if (!name_level_for_offsets.has_value() && it != columns_cache_for_subcolumns.end())
+        if (!column_for_offsets && it != columns_cache_for_subcolumns.end())
         {
             temp_column = it->second;
+            auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);
+            if (column->empty())
+                column = IColumn::mutate(subcolumn);
+            else
+                column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
+
             columns_cache_was_used = true;
         }
         else
@@ -362,17 +368,17 @@ void MergeTreeReaderCompact::readData(
             serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
             serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr);
 
-            if (!name_level_for_offsets.has_value())
+            if (!column_for_offsets)
                 columns_cache_for_subcolumns[name_type_in_storage.name] = temp_column;
+
+            auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);
+
+            /// TODO: Avoid extra copying.
+            if (column->empty())
+                column = subcolumn;
+            else
+                column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
         }
-
-        auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);
-
-        /// TODO: Avoid extra copying.
-        if (column->empty())
-            column = subcolumn;
-        else
-            column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
     }
     else
     {

From 2da0a306269f94ce208c1ac20c3f98e7da345e89 Mon Sep 17 00:00:00 2001
From: Bharat Nallan Chakravarthy <bharatnc@gmail.com>
Date: Mon, 1 Jan 2024 21:32:00 -0800
Subject: [PATCH 102/204] add a test

---
 .../test_wrong_db_or_table_name/test.py       | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py
index 641501eac84..a5096d80ca9 100644
--- a/tests/integration/test_wrong_db_or_table_name/test.py
+++ b/tests/integration/test_wrong_db_or_table_name/test.py
@@ -57,6 +57,31 @@ def test_drop_wrong_database_name(start):
     node.query("DROP DATABASE test;")
 
 
+def test_database_engine_name(start):
+    # test with a valid database engine
+    node.query(
+        """
+        CREATE DATABASE test_atomic ENGINE = Atomic;
+        CREATE TABLE test_atomic.table_test_atomic (i Int64) ENGINE = MergeTree() ORDER BY i;
+        INSERT INTO test_atomic.table_test_atomic SELECT 1;
+        """
+    )
+    assert 1 == int(node.query("SELECT * FROM test_atomic.table_test_atomic".strip()))
+    # test with a invalid database engine
+    with pytest.raises(
+        QueryRuntimeException,
+        match="DB::Exception: Unknown database engine Atomic123. Maybe you meant: \\['Atomic'\\].",
+    ):
+        node.query("CREATE DATABASE test_atomic123 ENGINE = Atomic123;")
+
+    node.query(
+        """
+        DROP TABLE test_atomic.table_test_atomic;
+        DROP DATABASE test_atomic;
+       """
+    )
+
+
 def test_wrong_table_name(start):
     node.query(
         """

From 7c5e329cb7ad8f34cea1c3affde8dcabffde8519 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 2 Jan 2024 23:27:25 +0100
Subject: [PATCH 103/204] Check what will happen if we only indent named tuples

---
 src/DataTypes/DataTypeMap.cpp   |  5 +----
 src/DataTypes/DataTypeTuple.cpp | 35 ++++++++++++++++++++++++---------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp
index acd26ca338b..1f246af74d3 100644
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@@ -85,10 +85,7 @@ std::string DataTypeMap::doGetName() const
 std::string DataTypeMap::doGetPrettyName(size_t indent) const
 {
     WriteBufferFromOwnString s;
-    s << "Map(\n"
-      << fourSpaceIndent(indent + 1) << key_type->getPrettyName(indent + 1) << ",\n"
-      << fourSpaceIndent(indent + 1) << value_type->getPrettyName(indent + 1) << '\n'
-      << fourSpaceIndent(indent) << ')';
+    s << "Map(" << key_type->getPrettyName(indent) << ", " << value_type->getPrettyName(indent) << ')';
     return s.str();
 }
 
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index fd2e5e6a784..a26f0df630e 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -98,21 +98,38 @@ std::string DataTypeTuple::doGetPrettyName(size_t indent) const
 {
     size_t size = elems.size();
     WriteBufferFromOwnString s;
-    s << "Tuple(\n";
 
-    for (size_t i = 0; i != size; ++i)
+    /// If the Tuple is named, we will output it in multiple lines with indentation.
+    if (have_explicit_names)
     {
-        if (i != 0)
-            s << ",\n";
+        s << "Tuple(\n";
 
-        s << fourSpaceIndent(indent + 1);
-        if (have_explicit_names)
-            s << backQuoteIfNeed(names[i]) << ' ';
+        for (size_t i = 0; i != size; ++i)
+        {
+            if (i != 0)
+                s << ",\n";
 
-        s << elems[i]->getPrettyName(indent + 1);
+            s << fourSpaceIndent(indent + 1)
+                << backQuoteIfNeed(names[i]) << ' '
+                << elems[i]->getPrettyName(indent + 1);
+        }
+
+        s << ')';
+    }
+    else
+    {
+        s << "Tuple(\n";
+
+        for (size_t i = 0; i != size; ++i)
+        {
+            if (i != 0)
+                s << ", ";
+            s << elems[i]->getPrettyName(indent);
+        }
+
+        s << ')';
     }
 
-    s << '\n' << fourSpaceIndent(indent) << ')';
     return s.str();
 }
 

From 30876b159afb21fc6a251c65b92d9ae793b59594 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 2 Jan 2024 20:56:19 +0100
Subject: [PATCH 104/204] fix

---
 .../0_stateless/01175_distributed_ddl_output_mode_long.sh    | 2 +-
 .../0_stateless/02447_drop_database_replica.reference        | 4 ----
 tests/queries/0_stateless/02447_drop_database_replica.sh     | 5 ++---
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
index d2695e602c5..12e142adda9 100755
--- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
+++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
@@ -33,7 +33,7 @@ function run_until_out_contains()
     done
 }
 
-RAND_COMMENT="01175_DDL_$RANDOM"
+RAND_COMMENT="01175_DDL_$CLICKHOUSE_DATABASE"
 LOG_COMMENT="${CLICKHOUSE_LOG_COMMENT}_$RAND_COMMENT"
 
 CLICKHOUSE_CLIENT_WITH_SETTINGS=${CLICKHOUSE_CLIENT/--log_comment ${CLICKHOUSE_LOG_COMMENT}/--log_comment ${LOG_COMMENT}}
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index 7be5dde1998..1af3ee244f1 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -12,9 +12,6 @@ t
 2
 rdb_default	1	1	s1	r1	1
 2
-s1	r1	OK	2	0
-s1	r2	QUEUED	2	0
-s2	r1	QUEUED	2	0
 2
 s1	r1	OK	2	0
 s1	r2	QUEUED	2	0
@@ -27,5 +24,4 @@ rdb_default	1	2	s1	r2	0
 t
 t2
 t3
-t4
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index d12f173f388..fb89db5045b 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -33,9 +33,8 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
 # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>/dev/null | sort
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort
+timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
+timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log" | sort
 
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"

From 3502245ecb6532f36355aeac3214ddb9c2973c3e Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 3 Jan 2024 04:32:33 +0000
Subject: [PATCH 105/204] use fs::absolute instead of fs::canonical

To allow users to work with symlinks in user_files_path

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/StorageFile.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 25bb6691ff6..b496b5a5e2e 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -115,7 +115,9 @@ void listFilesWithRegexpMatchingImpl(
     {
         try
         {
-            fs::path path = fs::canonical(path_for_ls + for_match);
+            /// Do not use fs::canonical or fs::weakly_canonical.
+            /// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
+            fs::path path = fs::absolute(path_for_ls + for_match);
             result.push_back(path.string());
         }
         catch (const std::exception &) // NOLINT

From 35325e8e830be18e3f010ea305099c7196b0fb93 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 3 Jan 2024 04:57:41 +0000
Subject: [PATCH 106/204] add a test

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 .../0_stateless/02051_symlinks_to_user_files.reference        | 1 +
 tests/queries/0_stateless/02051_symlinks_to_user_files.sh     | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference
index d86bac9de59..2c94e483710 100644
--- a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference
+++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference
@@ -1 +1,2 @@
 OK
+OK
diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
index 0af71e4deee..a88c0ddd5e9 100755
--- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
+++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
@@ -8,13 +8,14 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # See 01658_read_file_to_string_column.sh
 user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 
-FILE_PATH="${user_files_path}/file/"
+FILE_PATH="${user_files_path}/file"
 mkdir -p ${FILE_PATH}
 chmod 777 ${FILE_PATH}
 
 FILE="test_symlink_${CLICKHOUSE_DATABASE}"
 
 symlink_path=${FILE_PATH}/${FILE}
+symlink_path_with_regex=${FILE_PATH}*/${FILE}
 file_path=$CUR_DIR/${FILE}
 
 touch ${file_path}
@@ -29,3 +30,4 @@ trap cleanup EXIT
 
 ${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'";
 ${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String') order by a";
+${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path_with_regex}', 'Values', 'a String') order by a";

From 2186aa8f217cc83374f5db6bdab089662ba8085a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 08:03:01 +0000
Subject: [PATCH 107/204] Revert "Revert "Implement punycode
 encoding/decoding""

This reverts commit 345d29a3c1782254ad05d7ab9352a570386579ef.
---
 .gitmodules                                   |   3 +
 contrib/CMakeLists.txt                        |   1 +
 contrib/idna                                  |   1 +
 contrib/idna-cmake/CMakeLists.txt             |  24 +++
 .../functions/string-functions.md             |  65 +++++++
 src/Common/config.h.in                        |   1 +
 src/Functions/CMakeLists.txt                  |   4 +
 src/Functions/FunctionSqid.cpp                |   2 +-
 src/Functions/punycode.cpp                    | 165 ++++++++++++++++++
 src/configure_config.cmake                    |   3 +
 .../0_stateless/02932_punycode.reference      |  35 ++++
 tests/queries/0_stateless/02932_punycode.sql  |  63 +++++++
 .../aspell-ignore/en/aspell-dict.txt          |   3 +
 13 files changed, 369 insertions(+), 1 deletion(-)
 create mode 160000 contrib/idna
 create mode 100644 contrib/idna-cmake/CMakeLists.txt
 create mode 100644 src/Functions/punycode.cpp
 create mode 100644 tests/queries/0_stateless/02932_punycode.reference
 create mode 100644 tests/queries/0_stateless/02932_punycode.sql

diff --git a/.gitmodules b/.gitmodules
index 53ef899dd99..3b9faea3cc1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -360,3 +360,6 @@
 [submodule "contrib/sqids-cpp"]
 	path = contrib/sqids-cpp
 	url = https://github.com/sqids/sqids-cpp.git
+[submodule "contrib/idna"]
+	path = contrib/idna
+	url = https://github.com/ada-url/idna.git
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 1b5ba15187f..02cb19d4c07 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
 add_contrib (libpq-cmake libpq)
 add_contrib (nuraft-cmake NuRaft)
 add_contrib (fast_float-cmake fast_float)
+add_contrib (idna-cmake idna)
 add_contrib (datasketches-cpp-cmake datasketches-cpp)
 add_contrib (incbin-cmake incbin)
 add_contrib (sqids-cpp-cmake sqids-cpp)
diff --git a/contrib/idna b/contrib/idna
new file mode 160000
index 00000000000..3c8be01d42b
--- /dev/null
+++ b/contrib/idna
@@ -0,0 +1 @@
+Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667
diff --git a/contrib/idna-cmake/CMakeLists.txt b/contrib/idna-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..1138b836192
--- /dev/null
+++ b/contrib/idna-cmake/CMakeLists.txt
@@ -0,0 +1,24 @@
+option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
+if ((NOT ENABLE_IDNA))
+    message (STATUS "Not using idna")
+    return()
+endif()
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
+
+set (SRCS
+    "${LIBRARY_DIR}/src/idna.cpp"
+    "${LIBRARY_DIR}/src/mapping.cpp"
+    "${LIBRARY_DIR}/src/mapping_tables.cpp"
+    "${LIBRARY_DIR}/src/normalization.cpp"
+    "${LIBRARY_DIR}/src/normalization_tables.cpp"
+    "${LIBRARY_DIR}/src/punycode.cpp"
+    "${LIBRARY_DIR}/src/to_ascii.cpp"
+    "${LIBRARY_DIR}/src/to_unicode.cpp"
+    "${LIBRARY_DIR}/src/unicode_transcoding.cpp"
+    "${LIBRARY_DIR}/src/validity.cpp"
+)
+
+add_library (_idna ${SRCS})
+target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
+
+add_library (ch_contrib::idna ALIAS _idna)
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 20694211912..5bad7d53e62 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1383,6 +1383,71 @@ Result:
 └──────────────────┘
 ```
 
+## punycodeEncode
+
+Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
+The string must be UTF8-encoded, otherwise results are undefined.
+
+**Syntax**
+
+``` sql
+punycodeEncode(val)
+```
+
+**Arguments**
+
+- `val` - Input value. [String](../data-types/string.md)
+
+**Returned value**
+
+- A Punycode representation of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select punycodeEncode('München');
+```
+
+Result:
+
+```result
+┌─punycodeEncode('München')─┐
+│ Mnchen-3ya                │
+└───────────────────────────┘
+```
+
+## punycodeDecode
+
+Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
+
+**Syntax**
+
+``` sql
+punycodeEncode(val)
+```
+
+**Arguments**
+
+- `val` - Punycode-encoded string. [String](../data-types/string.md)
+
+**Returned value**
+
+- The plaintext of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select punycodeDecode('Mnchen-3ya');
+```
+
+Result:
+
+```result
+┌─punycodeEncode('Mnchen-3ya')─┐
+│ München                      │
+└──────────────────────────────┘
+```
+
 ## byteHammingDistance
 
 Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index f84e28942c5..5b3388a3b7d 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -28,6 +28,7 @@
 #cmakedefine01 USE_S2_GEOMETRY
 #cmakedefine01 USE_FASTOPS
 #cmakedefine01 USE_SQIDS
+#cmakedefine01 USE_IDNA
 #cmakedefine01 USE_NLP
 #cmakedefine01 USE_VECTORSCAN
 #cmakedefine01 USE_LIBURING
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 89676594581..a06e898b7c5 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
     list (APPEND PRIVATE_LIBS ch_contrib::sqids)
 endif()
 
+if (TARGET ch_contrib::idna)
+    list (APPEND PRIVATE_LIBS ch_contrib::idna)
+endif()
+
 if (TARGET ch_contrib::h3)
     list (APPEND PRIVATE_LIBS ch_contrib::h3)
 endif()
diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/FunctionSqid.cpp
index 546263914c2..4517bba963e 100644
--- a/src/Functions/FunctionSqid.cpp
+++ b/src/Functions/FunctionSqid.cpp
@@ -1,6 +1,6 @@
 #include "config.h"
 
-#ifdef ENABLE_SQIDS
+#if USE_SQIDS
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
new file mode 100644
index 00000000000..c11409f0d1a
--- /dev/null
+++ b/src/Functions/punycode.cpp
@@ -0,0 +1,165 @@
+#include "config.h"
+
+#if USE_IDNA
+
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+
+#ifdef __clang__
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wnewline-eof"
+#endif
+#    include <ada/idna/punycode.h>
+#    include <ada/idna/unicode_transcoding.h>
+#ifdef __clang__
+#    pragma clang diagnostic pop
+#endif
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+struct PunycodeEncodeImpl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+
+        size_t prev_offset = 0;
+        std::u32string value_utf32;
+        std::string value_puny;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+
+            const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
+            value_utf32.resize(value_utf32_length);
+            ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
+
+            const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
+            if (!ok)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
+
+            res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            value_utf32.clear();
+            value_puny.clear(); /// utf32_to_punycode() appends to its output string
+        }
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function");
+    }
+};
+
+struct PunycodeDecodeImpl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+
+        size_t prev_offset = 0;
+        std::u32string value_utf32;
+        std::string value_utf8;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+
+            const std::string_view value_punycode(value, value_length);
+            const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
+            if (!ok)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
+
+            const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
+            value_utf8.resize(utf8_length);
+            ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
+
+            res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            value_utf32.clear(); /// punycode_to_utf32() appends to its output string
+            value_utf8.clear();
+        }
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function");
+    }
+};
+
+struct NamePunycodeEncode
+{
+    static constexpr auto name = "punycodeEncode";
+};
+
+struct NamePunycodeDecode
+{
+    static constexpr auto name = "punycodeDecode";
+};
+
+REGISTER_FUNCTION(Punycode)
+{
+    factory.registerFunction<FunctionStringToString<PunycodeEncodeImpl, NamePunycodeEncode>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string.)",
+        .syntax="punycodeEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeEncode('München') AS puny;",
+            R"(
+┌─puny───────┐
+│ Mnchen-3ya │
+└────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionStringToString<PunycodeDecodeImpl, NamePunycodeDecode>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string.)",
+        .syntax="punycodeDecode(str)",
+        .arguments={{"str", "A Punycode-encoded string"}},
+        .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeDecode('Mnchen-3ya') AS plain;",
+            R"(
+┌─plain───┐
+│ München │
+└─────────┘
+            )"
+            }}
+    });
+}
+
+}
+
+#endif
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 9358abdf7f8..7de2d5a9fdd 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -129,6 +129,9 @@ endif()
 if (TARGET ch_contrib::sqids)
     set(USE_SQIDS 1)
 endif()
+if (TARGET ch_contrib::idna)
+    set(USE_IDNA 1)
+endif()
 if (TARGET ch_contrib::vectorscan)
     set(USE_VECTORSCAN 1)
 endif()
diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference
new file mode 100644
index 00000000000..7a39a221e08
--- /dev/null
+++ b/tests/queries/0_stateless/02932_punycode.reference
@@ -0,0 +1,35 @@
+-- Negative tests
+-- Regular cases
+a	a-	a
+A	A-	A
+--	---	--
+London	London-	London
+Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson
+This has spaces	This has spaces-	This has spaces
+-> $1.00 <-	-> $1.00 <--	-> $1.00 <-
+а	80a	а
+ü	tda	ü
+α	mxa	α
+例	fsq	例
+😉	n28h	😉
+αβγ	mxacd	αβγ
+München	Mnchen-3ya	München
+Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya
+München-Ost	Mnchen-Ost-9db	München-Ost
+Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost
+abæcdöef	abcdef-qua4k	abæcdöef
+правда	80aafi6cg	правда
+ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข
+ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例
+MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前
+「bücher」	bcher-kva8445foa	「bücher」
+团淄	3bs854c	团淄
+-- Special cases
+
+
+\N
+\N
+Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+München	Mnchen-3ya	München
+abc	abc-	abc
+aäoöuü	aou-qla5gqb	aäoöuü
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
new file mode 100644
index 00000000000..dd18a43ecc9
--- /dev/null
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -0,0 +1,63 @@
+-- Tags: no-fasttest
+-- no-fasttest: requires idna library
+
+SELECT '-- Negative tests';
+
+SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
+SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
+
+SELECT '-- Regular cases';
+
+-- The test cases originate from the ada idna unit tests:
+--- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
+
+SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+
+SELECT '-- Special cases';
+
+SELECT punycodeDecode('');
+SELECT punycodeEncode('');
+SELECT punycodeDecode(NULL);
+SELECT punycodeEncode(NULL);
+
+-- garbage Punycode-encoded values
+SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
+
+-- long input
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+
+-- non-const values
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
+INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
+SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original FROM tab;
+DROP TABLE tab;
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index d68330771e5..f339f451878 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -714,6 +714,7 @@ Promtail
 Protobuf
 ProtobufSingle
 ProxySQL
+Punycode
 PyArrow
 PyCharm
 QEMU
@@ -2075,6 +2076,8 @@ pseudorandom
 pseudorandomize
 psql
 ptrs
+punycodeDecode
+punycodeEncode
 pushdown
 pwrite
 py

From 45613f1d14841756a15219c7ecb81aadd59496b9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 09:18:25 +0000
Subject: [PATCH 108/204] Refactor punycodeEncode/Decode() functions to inerit
 from IFunction directly

---
 src/Functions/punycode.cpp                   | 123 +++++++++++++++----
 tests/queries/0_stateless/02932_punycode.sql |   4 +-
 2 files changed, 101 insertions(+), 26 deletions(-)

diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index c11409f0d1a..fb89759b24d 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -2,8 +2,11 @@
 
 #if USE_IDNA
 
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionStringToString.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
 
 #ifdef __clang__
 #    pragma clang diagnostic push
@@ -24,8 +27,57 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
-struct PunycodeEncodeImpl
+namespace
 {
+
+enum class ExceptionMode
+{
+    Throw,
+    Null
+};
+
+template <ExceptionMode exception_mode>
+class FunctionPunycodeEncode : public IFunction
+{
+public:
+    static constexpr auto name = (exception_mode == ExceptionMode::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeEncode>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args{
+            {"str", &isString<IDataType>, nullptr, "String"},
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
+
+        auto return_type = std::make_shared<DataTypeString>();
+
+        if constexpr (exception_mode == ExceptionMode::Null)
+            return makeNullable(return_type);
+        else
+            return return_type;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        const ColumnPtr column = arguments[0].column;
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
+        {
+            auto col_res = ColumnString::create();
+            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
+                arguments[0].column->getName(), getName());
+    }
+
+private:
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -61,15 +113,50 @@ struct PunycodeEncodeImpl
             value_puny.clear(); /// utf32_to_punycode() appends to its output string
         }
     }
-
-    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
-    {
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function");
-    }
 };
 
-struct PunycodeDecodeImpl
+template <ExceptionMode exception_mode>
+class FunctionPunycodeDecode : public IFunction
 {
+public:
+    static constexpr auto name = (exception_mode == ExceptionMode::Null) ? "punycodeDecodeOrNull" : "punycodeDecode";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeDecode>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args{
+            {"str", &isString<IDataType>, nullptr, "String"},
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
+
+        auto return_type = std::make_shared<DataTypeString>();
+
+        if constexpr (exception_mode == ExceptionMode::Null)
+            return makeNullable(return_type);
+        else
+            return return_type;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        const ColumnPtr column = arguments[0].column;
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
+        {
+            auto col_res = ColumnString::create();
+            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
+                arguments[0].column->getName(), getName());
+    }
+
+private:
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -106,26 +193,13 @@ struct PunycodeDecodeImpl
             value_utf8.clear();
         }
     }
-
-    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
-    {
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function");
-    }
 };
 
-struct NamePunycodeEncode
-{
-    static constexpr auto name = "punycodeEncode";
-};
-
-struct NamePunycodeDecode
-{
-    static constexpr auto name = "punycodeDecode";
-};
+}
 
 REGISTER_FUNCTION(Punycode)
 {
-    factory.registerFunction<FunctionStringToString<PunycodeEncodeImpl, NamePunycodeEncode>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeEncode<ExceptionMode::Throw>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string.)",
         .syntax="punycodeEncode(str)",
@@ -142,7 +216,7 @@ Computes a Punycode representation of a string.)",
             }}
     });
 
-    factory.registerFunction<FunctionStringToString<PunycodeDecodeImpl, NamePunycodeDecode>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeDecode<ExceptionMode::Throw>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string.)",
         .syntax="punycodeDecode(str)",
@@ -158,6 +232,7 @@ Computes a Punycode representation of a string.)",
             )"
             }}
     });
+
 }
 
 }
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
index dd18a43ecc9..afc3c7e8712 100644
--- a/tests/queries/0_stateless/02932_punycode.sql
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -9,8 +9,8 @@ SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
-SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
+SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT '-- Regular cases';
 

From 12fda5f3095edc9aae987391194c97482777f560 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 3 Jan 2024 09:22:29 +0000
Subject: [PATCH 109/204] fix 02771_multidirectory_globs_storage_file

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/StorageFile.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b496b5a5e2e..b3c686c290d 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -115,10 +115,12 @@ void listFilesWithRegexpMatchingImpl(
     {
         try
         {
-            /// Do not use fs::canonical or fs::weakly_canonical.
+            /// We use fs::canonical to check if the file exists but the result path
+            /// will be fs::absolute.
             /// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
-            fs::path path = fs::absolute(path_for_ls + for_match);
-            result.push_back(path.string());
+            fs::path canonical_path = fs::canonical(path_for_ls + for_match);
+            fs::path absolute_path = fs::absolute(path_for_ls + for_match);
+            result.push_back(absolute_path.string());
         }
         catch (const std::exception &) // NOLINT
         {

From eeed23b1bc2b789c0d3097595540b16ef7e788b0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 09:45:25 +0000
Subject: [PATCH 110/204] Fix sanitizer assert.

---
 src/Storages/StorageURL.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 36219d13a45..9ace7775d4b 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1035,12 +1035,12 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate)
     }
     else
     {
-        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, max_addresses, done = false]() mutable
+        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([max_addresses, done = false, &uri = storage->uri]() mutable
         {
             if (done)
                 return StorageURLSource::FailoverOptions{};
             done = true;
-            return getFailoverOptions(storage->uri, max_addresses);
+            return getFailoverOptions(uri, max_addresses);
         });
         num_streams = 1;
     }

From 78776a060c305f76fc667be26177309cf5ed566d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 11:06:57 +0100
Subject: [PATCH 111/204] Update src/Storages/StorageMerge.cpp

Co-authored-by: Dmitry Novik <n0vik@clickhouse.com>
---
 src/Storages/StorageMerge.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index fb208b64c78..b3cef83ccdd 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -51,7 +51,7 @@
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
-#include "Parsers/queryToString.h"
+#include <Parsers/queryToString.h>
 
 namespace
 {

From d5dcb6661d4c1f5ee7f617de9db9002800b1e07e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 10:12:08 +0000
Subject: [PATCH 112/204] Review fixes.

---
 src/Storages/StorageMerge.cpp | 14 +++++++-------
 src/Storages/StorageMerge.h   |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index b3cef83ccdd..58124973cbf 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -361,7 +361,7 @@ ReadFromMerge::ReadFromMerge(
 
 void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    filterTablesAndCreateChildPlans();
+    filterTablesAndCreateChildrenPlans();
 
     if (selected_tables.empty())
     {
@@ -429,7 +429,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
     pipeline.addResources(std::move(resources));
 }
 
-void ReadFromMerge::filterTablesAndCreateChildPlans()
+void ReadFromMerge::filterTablesAndCreateChildrenPlans()
 {
     if (child_plans)
         return;
@@ -451,10 +451,10 @@ void ReadFromMerge::filterTablesAndCreateChildPlans()
 
     selected_tables = getSelectedTables(context, has_database_virtual_column, has_table_virtual_column);
 
-    child_plans = createChildPlans(query_info);
+    child_plans = createChildrenPlans(query_info);
 }
 
-std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildPlans(SelectQueryInfo & query_info_) const
+std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQueryInfo & query_info_) const
 {
     if (selected_tables.empty())
         return {};
@@ -1162,13 +1162,13 @@ void ReadFromMerge::convertAndFilterSourceStream(
 
 const ReadFromMerge::StorageListWithLocks & ReadFromMerge::getSelectedTables()
 {
-    filterTablesAndCreateChildPlans();
+    filterTablesAndCreateChildrenPlans();
     return selected_tables;
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
 {
-    filterTablesAndCreateChildPlans();
+    filterTablesAndCreateChildrenPlans();
 
     /// Disable read-in-order optimization for reverse order with final.
     /// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order).
@@ -1211,7 +1211,7 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan) const
 
 void ReadFromMerge::applyFilters()
 {
-    filterTablesAndCreateChildPlans();
+    filterTablesAndCreateChildrenPlans();
 
     for (const auto & child_plan : *child_plans)
         if (child_plan.plan.isInitialized())
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 95c373eab3b..703e5db9c50 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -232,9 +232,9 @@ private:
     /// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE).
     std::optional<std::vector<ChildPlan>> child_plans;
 
-    std::vector<ChildPlan> createChildPlans(SelectQueryInfo & query_info_) const;
+    std::vector<ChildPlan> createChildrenPlans(SelectQueryInfo & query_info_) const;
 
-    void filterTablesAndCreateChildPlans();
+    void filterTablesAndCreateChildrenPlans();
 
     void applyFilters(const QueryPlan & plan) const;
 

From b5d7ea2b0255e541e2b3dad539e90726cc894bac Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 10:50:00 +0000
Subject: [PATCH 113/204] Introduce OrNull() overloads for
 punycodeEncode/Decode()

---
 .../functions/string-functions.md             |  11 +-
 src/Functions/punycode.cpp                    | 125 ++++++++++++++----
 .../0_stateless/02932_punycode.reference      |  67 ++++++----
 tests/queries/0_stateless/02932_punycode.sql  |  77 +++++++----
 4 files changed, 196 insertions(+), 84 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 5bad7d53e62..eb5cb29c502 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1386,7 +1386,7 @@ Result:
 ## punycodeEncode
 
 Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
-The string must be UTF8-encoded, otherwise results are undefined.
+The string must be UTF8-encoded, otherwise an exception is thrown.
 
 **Syntax**
 
@@ -1416,9 +1416,14 @@ Result:
 └───────────────────────────┘
 ```
 
+## punycodeEncodeOrNull
+
+Like `punycodeEncode` but returns `NULL` in case of an error instead of throwing an exception.
+
 ## punycodeDecode
 
 Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
+If no valid Punycode-encoded string is given, an exception is thrown.
 
 **Syntax**
 
@@ -1448,6 +1453,10 @@ Result:
 └──────────────────────────────┘
 ```
 
+## punycodeDecodeOrNull
+
+Like `punycodeDecode` but returns `NULL` in case of an error instead of throwing an exception.
+
 ## byteHammingDistance
 
 Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index fb89759b24d..ad66c723f05 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -2,7 +2,9 @@
 
 #if USE_IDNA
 
+#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -30,17 +32,17 @@ namespace ErrorCodes
 namespace
 {
 
-enum class ExceptionMode
+enum class ErrorHandling
 {
     Throw,
     Null
 };
 
-template <ExceptionMode exception_mode>
+template <ErrorHandling error_handling>
 class FunctionPunycodeEncode : public IFunction
 {
 public:
-    static constexpr auto name = (exception_mode == ExceptionMode::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
+    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
 
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeEncode>(); }
     String getName() const override { return name; }
@@ -57,7 +59,7 @@ public:
 
         auto return_type = std::make_shared<DataTypeString>();
 
-        if constexpr (exception_mode == ExceptionMode::Null)
+        if constexpr (error_handling == ErrorHandling::Null)
             return makeNullable(return_type);
         else
             return return_type;
@@ -65,16 +67,18 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
-        const ColumnPtr column = arguments[0].column;
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
-        {
-            auto col_res = ColumnString::create();
-            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
-            return col_res;
-        }
+        auto col_res = ColumnString::create();
+        ColumnUInt8::MutablePtr col_res_null;
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
+            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
                 arguments[0].column->getName(), getName());
+
+        if constexpr (error_handling == ErrorHandling::Null)
+            return ColumnNullable::create(std::move(col_res), std::move(col_res_null));
+        else
+            return col_res;
     }
 
 private:
@@ -82,11 +86,14 @@ private:
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets,
+        ColumnUInt8::MutablePtr & col_res_null)
     {
         const size_t rows = offsets.size();
         res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
         res_offsets.reserve(rows);
+        if constexpr (error_handling == ErrorHandling::Null)
+            col_res_null = ColumnUInt8::create(rows, 0);
 
         size_t prev_offset = 0;
         std::u32string value_utf32;
@@ -102,7 +109,17 @@ private:
 
             const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
             if (!ok)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
+            {
+                if constexpr (error_handling == ErrorHandling::Throw)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
+                }
+                else
+                {
+                    value_puny.clear();
+                    col_res_null->getData()[row] = 1;
+                }
+            }
 
             res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
             res_offsets.push_back(res_data.size());
@@ -115,11 +132,11 @@ private:
     }
 };
 
-template <ExceptionMode exception_mode>
+template <ErrorHandling error_handling>
 class FunctionPunycodeDecode : public IFunction
 {
 public:
-    static constexpr auto name = (exception_mode == ExceptionMode::Null) ? "punycodeDecodeOrNull" : "punycodeDecode";
+    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeDecodeOrNull" : "punycodeDecode";
 
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeDecode>(); }
     String getName() const override { return name; }
@@ -136,7 +153,7 @@ public:
 
         auto return_type = std::make_shared<DataTypeString>();
 
-        if constexpr (exception_mode == ExceptionMode::Null)
+        if constexpr (error_handling == ErrorHandling::Null)
             return makeNullable(return_type);
         else
             return return_type;
@@ -144,16 +161,19 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
-        const ColumnPtr column = arguments[0].column;
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
-        {
-            auto col_res = ColumnString::create();
-            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
-            return col_res;
-        }
+        auto col_res = ColumnString::create();
+        ColumnUInt8::MutablePtr col_res_null;
+
+        if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
+            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
                 arguments[0].column->getName(), getName());
+
+        if constexpr (error_handling == ErrorHandling::Null)
+            return ColumnNullable::create(std::move(col_res), std::move(col_res_null));
+        else
+            return col_res;
     }
 
 private:
@@ -161,11 +181,14 @@ private:
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets,
+        ColumnUInt8::MutablePtr & col_res_null)
     {
         const size_t rows = offsets.size();
         res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
         res_offsets.reserve(rows);
+        if constexpr (error_handling == ErrorHandling::Null)
+            col_res_null = ColumnUInt8::create(rows, 0);
 
         size_t prev_offset = 0;
         std::u32string value_utf32;
@@ -178,7 +201,17 @@ private:
             const std::string_view value_punycode(value, value_length);
             const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
             if (!ok)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
+            {
+                if constexpr (error_handling == ErrorHandling::Throw)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' is not a valid Punycode-encoded string", value_punycode);
+                }
+                else
+                {
+                    value_utf32.clear();
+                    col_res_null->getData()[row] = 1;
+                }
+            }
 
             const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
             value_utf8.resize(utf8_length);
@@ -199,9 +232,9 @@ private:
 
 REGISTER_FUNCTION(Punycode)
 {
-    factory.registerFunction<FunctionPunycodeEncode<ExceptionMode::Throw>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeEncode<ErrorHandling::Throw>>(FunctionDocumentation{
         .description=R"(
-Computes a Punycode representation of a string.)",
+Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeEncode(str)",
         .arguments={{"str", "Input string"}},
         .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
@@ -216,9 +249,26 @@ Computes a Punycode representation of a string.)",
             }}
     });
 
-    factory.registerFunction<FunctionPunycodeDecode<ExceptionMode::Throw>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeEncode<ErrorHandling::Null>>(FunctionDocumentation{
         .description=R"(
-Computes a Punycode representation of a string.)",
+Computes a Punycode representation of a string. Returns NULL in case of error)",
+        .syntax="punycodeEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeEncodeOrNull('München') AS puny;",
+            R"(
+┌─puny───────┐
+│ Mnchen-3ya │
+└────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionPunycodeDecode<ErrorHandling::Throw>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeDecode(str)",
         .arguments={{"str", "A Punycode-encoded string"}},
         .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
@@ -228,6 +278,23 @@ Computes a Punycode representation of a string.)",
             R"(
 ┌─plain───┐
 │ München │
+└─────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionPunycodeDecode<ErrorHandling::Null>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string. Returns NULL in case of error)",
+        .syntax="punycodeDecode(str)",
+        .arguments={{"str", "A Punycode-encoded string"}},
+        .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeDecodeOrNull('Mnchen-3ya') AS plain;",
+            R"(
+┌─plain───┐
+│ München │
 └─────────┘
             )"
             }}
diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference
index 7a39a221e08..a722894bce8 100644
--- a/tests/queries/0_stateless/02932_punycode.reference
+++ b/tests/queries/0_stateless/02932_punycode.reference
@@ -1,35 +1,46 @@
 -- Negative tests
 -- Regular cases
-a	a-	a
-A	A-	A
---	---	--
-London	London-	London
-Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson
-This has spaces	This has spaces-	This has spaces
--> $1.00 <-	-> $1.00 <--	-> $1.00 <-
-а	80a	а
-ü	tda	ü
-α	mxa	α
-例	fsq	例
-😉	n28h	😉
-αβγ	mxacd	αβγ
-München	Mnchen-3ya	München
-Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya
-München-Ost	Mnchen-Ost-9db	München-Ost
-Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost
-abæcdöef	abcdef-qua4k	abæcdöef
-правда	80aafi6cg	правда
-ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข
-ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例
-MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前
-「bücher」	bcher-kva8445foa	「bücher」
-团淄	3bs854c	团淄
+a	a-	a	a-	a
+A	A-	A	A-	A
+--	---	--	---	--
+London	London-	London	London-	London
+Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson
+This has spaces	This has spaces-	This has spaces	This has spaces-	This has spaces
+-> $1.00 <-	-> $1.00 <--	-> $1.00 <-	-> $1.00 <--	-> $1.00 <-
+а	80a	а	80a	а
+ü	tda	ü	tda	ü
+α	mxa	α	mxa	α
+例	fsq	例	fsq	例
+😉	n28h	😉	n28h	😉
+αβγ	mxacd	αβγ	mxacd	αβγ
+München	Mnchen-3ya	München	Mnchen-3ya	München
+Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya
+München-Ost	Mnchen-Ost-9db	München-Ost	Mnchen-Ost-9db	München-Ost
+Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost
+abæcdöef	abcdef-qua4k	abæcdöef	abcdef-qua4k	abæcdöef
+правда	80aafi6cg	правда	80aafi6cg	правда
+ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข
+ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例
+MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前
+「bücher」	bcher-kva8445foa	「bücher」	bcher-kva8445foa	「bücher」
+团淄	3bs854c	团淄	3bs854c	团淄
 -- Special cases
 
 
+
+
 \N
 \N
-Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
-München	Mnchen-3ya	München
-abc	abc-	abc
-aäoöuü	aou-qla5gqb	aäoöuü
+\N
+\N
+\N
+Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+München	Mnchen-3ya	München	Mnchen-3ya	München
+abc	abc-	abc	abc-	abc
+aäoöuü	aou-qla5gqb	aäoöuü	aou-qla5gqb	aäoöuü
+Also no punycode	\N
+London-	London
+Mnchen-3ya	München
+No punycode	\N
+Rtting-3ya	Rütting
+XYZ no punycode	\N
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
index afc3c7e8712..a142848d427 100644
--- a/tests/queries/0_stateless/02932_punycode.sql
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -4,60 +4,85 @@
 SELECT '-- Negative tests';
 
 SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
 SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
 SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
 SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT '-- Regular cases';
 
 -- The test cases originate from the ada idna unit tests:
 --- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
 
-SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
-SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
 
 SELECT '-- Special cases';
 
 SELECT punycodeDecode('');
+SELECT punycodeDecodeOrNull('');
 SELECT punycodeEncode('');
+SELECT punycodeEncodeOrNull('');
+
 SELECT punycodeDecode(NULL);
+SELECT punycodeDecodeOrNull(NULL);
 SELECT punycodeEncode(NULL);
+SELECT punycodeEncodeOrNull(NULL);
 
 -- garbage Punycode-encoded values
 SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
+SELECT punycodeDecodeOrNull('no punycode');
 
 -- long input
-SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
 
 -- non-const values
 DROP TABLE IF EXISTS tab;
 CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
 INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
-SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original FROM tab;
+SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull FROM tab;
+DROP TABLE tab;
+
+-- non-const values with a few invalid values for testing the OrNull variants
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny;
+INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode');
+SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
+SELECT puny, punycodeDecodeOrNull(puny) AS original FROM tab;
 DROP TABLE tab;

From 91fc3b3456590ffc9577b080357513e651303b1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 10:54:42 +0000
Subject: [PATCH 114/204] Linter

---
 src/Common/findExtreme.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp
index e1f1e199d56..032ac75b79b 100644
--- a/src/Common/findExtreme.cpp
+++ b/src/Common/findExtreme.cpp
@@ -20,7 +20,7 @@ struct MaxComparator
 MULTITARGET_FUNCTION_AVX2_SSE42(
     MULTITARGET_FUNCTION_HEADER(template <is_any_native_number T, typename ComparatorClass, bool add_all_elements, bool add_if_cond_zero> static std::optional<T> NO_INLINE),
     findExtremeImpl,
-    MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end)
+    MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end) /// NOLINT
     {
         size_t count = row_end - row_begin;
         ptr += row_begin;

From cb4d571a453e1427ffdbb40d1cfe9bfb975aa611 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 11:14:19 +0000
Subject: [PATCH 115/204] Support ALIAS columns in USING clause

---
 src/Planner/CollectTableExpressionData.cpp    | 55 +++++++++++++++-
 src/Planner/PlannerJoinTree.cpp               | 63 +++++++++++++++++++
 src/Planner/TableExpressionData.h             |  4 +-
 ...5_analyzer_using_functional_args.reference |  1 +
 .../02955_analyzer_using_functional_args.sql  |  1 +
 5 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 5ba318dab6a..492120141a4 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -8,6 +8,8 @@
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/TableNode.h>
 #include <Analyzer/TableFunctionNode.h>
+#include <Analyzer/JoinNode.h>
+#include <Analyzer/ListNode.h>
 
 #include <Planner/PlannerContext.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -33,6 +35,25 @@ public:
 
     void visitImpl(QueryTreeNodePtr & node)
     {
+        /// Special case for USING clause which contains references to ALIAS columns.
+        /// We can not modify such ColumnNode.
+        if (auto * join_node = node->as<JoinNode>())
+        {
+            if (!join_node->isUsingJoinExpression())
+                return;
+
+            auto & using_list = join_node->getJoinExpression()->as<ListNode&>();
+            for (auto & using_element : using_list)
+            {
+                auto & column_node = using_element->as<ColumnNode&>();
+                auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as<ListNode&>().getNodes();
+
+                visitUsingColumn(columns_from_subtrees[0]);
+                visitUsingColumn(columns_from_subtrees[1]);
+            }
+            return;
+        }
+
         auto * column_node = node->as<ColumnNode>();
         if (!column_node)
             return;
@@ -55,7 +76,13 @@ public:
         if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN)
         {
             /// Replace ALIAS column with expression
-            table_expression_data.addAliasColumnName(column_node->getColumnName());
+            bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
+            if (column_already_exists)
+                return;
+
+            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+
+            table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
             node = column_node->getExpression();
             visitImpl(node);
             return;
@@ -78,13 +105,37 @@ public:
         table_expression_data.addColumn(column_node->getColumn(), column_identifier);
     }
 
-    static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
+    static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node)
     {
+        if (auto * join_node = parent->as<JoinNode>())
+        {
+            return join_node->getJoinExpression() != child_node || !join_node->isUsingJoinExpression();
+        }
         auto child_node_type = child_node->getNodeType();
         return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
     }
 
 private:
+
+    void visitUsingColumn(QueryTreeNodePtr & node)
+    {
+        auto & column_node = node->as<ColumnNode&>();
+        if (column_node.hasExpression())
+        {
+            auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource());
+            bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName());
+            if (column_already_exists)
+                return;
+
+            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+            table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier);
+
+            visitImpl(column_node.getExpressionOrThrow());
+        }
+        else
+            visitImpl(node);
+    }
+
     PlannerContext & planner_context;
 };
 
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index e2cdf146a69..548c151757e 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -978,6 +978,55 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
     plan_to_add_cast.addStep(std::move(cast_join_columns_step));
 }
 
+struct UsingAliasKeyActions
+{
+    UsingAliasKeyActions(
+        const ColumnsWithTypeAndName & left_plan_output_columns,
+        const ColumnsWithTypeAndName & right_plan_output_columns
+    )
+        : left_alias_columns_keys(std::make_shared<ActionsDAG>(left_plan_output_columns))
+        , right_alias_columns_keys(std::make_shared<ActionsDAG>(right_plan_output_columns))
+    {}
+
+    void addLeftColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        addColumnImpl(left_alias_columns_keys, node, plan_output_columns, planner_context);
+    }
+
+    void addRightColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        addColumnImpl(right_alias_columns_keys, node, plan_output_columns, planner_context);
+    }
+
+    ActionsDAGPtr getLeftActions()
+    {
+        left_alias_columns_keys->projectInput();
+        return std::move(left_alias_columns_keys);
+    }
+
+    ActionsDAGPtr getRightActions()
+    {
+        right_alias_columns_keys->projectInput();
+        return std::move(right_alias_columns_keys);
+    }
+
+private:
+    void addColumnImpl(ActionsDAGPtr & alias_columns_keys, QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        auto & column_node = node->as<ColumnNode&>();
+        if (column_node.hasExpression())
+        {
+            auto dag = buildActionsDAGFromExpressionNode(column_node.getExpressionOrThrow(), plan_output_columns, planner_context);
+            const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(node);
+            dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs().front(), left_inner_column_identifier));
+            alias_columns_keys->mergeInplace(std::move(*dag));
+        }
+    }
+
+    ActionsDAGPtr left_alias_columns_keys;
+    ActionsDAGPtr right_alias_columns_keys;
+};
+
 JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
     JoinTreeQueryPlan left_join_tree_query_plan,
     JoinTreeQueryPlan right_join_tree_query_plan,
@@ -1034,6 +1083,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
 
     if (join_node.isUsingJoinExpression())
     {
+        UsingAliasKeyActions using_alias_key_actions{left_plan_output_columns, right_plan_output_columns};
+
         auto & join_node_using_columns_list = join_node.getJoinExpression()->as<ListNode &>();
         for (auto & join_node_using_node : join_node_using_columns_list.getNodes())
         {
@@ -1043,9 +1094,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             auto & left_inner_column_node = inner_columns_list.getNodes().at(0);
             auto & left_inner_column = left_inner_column_node->as<ColumnNode &>();
 
+            using_alias_key_actions.addLeftColumn(left_inner_column_node, left_plan_output_columns, planner_context);
+
             auto & right_inner_column_node = inner_columns_list.getNodes().at(1);
             auto & right_inner_column = right_inner_column_node->as<ColumnNode &>();
 
+            using_alias_key_actions.addRightColumn(right_inner_column_node, right_plan_output_columns, planner_context);
+
             const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType();
             if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type))
             {
@@ -1059,6 +1114,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
                 right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type);
             }
         }
+
+        auto left_alias_columns_keys_step = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), using_alias_key_actions.getLeftActions());
+        left_alias_columns_keys_step->setStepDescription("Actions for left table alias column keys");
+        left_plan.addStep(std::move(left_alias_columns_keys_step));
+
+        auto right_alias_columns_keys_step = std::make_unique<ExpressionStep>(right_plan.getCurrentDataStream(), using_alias_key_actions.getRightActions());
+        right_alias_columns_keys_step->setStepDescription("Actions for right table alias column keys");
+        right_plan.addStep(std::move(right_alias_columns_keys_step));
     }
 
     auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map<std::string, DataTypePtr> & plan_column_name_to_cast_type)
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index 9f963dc182a..f6ef4017c98 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -80,9 +80,11 @@ public:
     }
 
     /// Add alias column name
-    void addAliasColumnName(const std::string & column_name)
+    void addAliasColumnName(const std::string & column_name, const ColumnIdentifier & column_identifier)
     {
         alias_columns_names.insert(column_name);
+
+        column_name_to_column_identifier.emplace(column_name, column_identifier);
     }
 
     /// Get alias columns names
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
index d00491fd7e5..6ed281c757a 100644
--- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
@@ -1 +1,2 @@
 1
+1
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
index e4c1fd86b09..7983b43d7e5 100644
--- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
@@ -6,6 +6,7 @@ INSERT INTO t2 VALUES (6666, 48);
 INSERT INTO t2 VALUES (369, 50);
 
 SELECT count() FROM t1 INNER JOIN t2 USING (y);
+SELECT count() FROM t2 INNER JOIN t1 USING (y);
 
 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t2;

From 76f58fb49ebb80b5143435d0de9635280f72c73a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 11:17:24 +0000
Subject: [PATCH 116/204] Add a comment

---
 src/Planner/PlannerJoinTree.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 548c151757e..774e01839fc 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -978,6 +978,8 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
     plan_to_add_cast.addStep(std::move(cast_join_columns_step));
 }
 
+/// Actions to calculate table columns that have a functional representation (ALIASes and subcolumns)
+/// and used in USING clause of JOIN expression.
 struct UsingAliasKeyActions
 {
     UsingAliasKeyActions(

From 9a49f0cbb3a278e61d8390c8968aef920b309975 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 11:26:01 +0000
Subject: [PATCH 117/204] Factorize common code

---
 src/Functions/punycode.cpp | 78 ++++++++++++--------------------------
 1 file changed, 24 insertions(+), 54 deletions(-)

diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index ad66c723f05..869b79daa10 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -38,13 +38,14 @@ enum class ErrorHandling
     Null
 };
 
-template <ErrorHandling error_handling>
-class FunctionPunycodeEncode : public IFunction
+
+template <typename Impl>
+class FunctionPunycode : public IFunction
 {
 public:
-    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
+    static constexpr auto name = Impl::name;
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeEncode>(); }
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionPunycode<Impl>>(); }
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 1; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@@ -59,7 +60,7 @@ public:
 
         auto return_type = std::make_shared<DataTypeString>();
 
-        if constexpr (error_handling == ErrorHandling::Null)
+        if constexpr (Impl::error_handling == ErrorHandling::Null)
             return makeNullable(return_type);
         else
             return return_type;
@@ -70,18 +71,25 @@ public:
         auto col_res = ColumnString::create();
         ColumnUInt8::MutablePtr col_res_null;
         if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
-            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
+            Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
                 arguments[0].column->getName(), getName());
 
-        if constexpr (error_handling == ErrorHandling::Null)
+        if constexpr (Impl::error_handling == ErrorHandling::Null)
             return ColumnNullable::create(std::move(col_res), std::move(col_res_null));
         else
             return col_res;
     }
+};
+
+
+template <ErrorHandling error_handling_>
+struct PunycodeEncodeImpl
+{
+    static constexpr auto error_handling = error_handling_;
+    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
 
-private:
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -132,51 +140,13 @@ private:
     }
 };
 
-template <ErrorHandling error_handling>
-class FunctionPunycodeDecode : public IFunction
+
+template <ErrorHandling error_handling_>
+struct PunycodeDecodeImpl
 {
-public:
+    static constexpr auto error_handling = error_handling_;
     static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeDecodeOrNull" : "punycodeDecode";
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPunycodeDecode>(); }
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"str", &isString<IDataType>, nullptr, "String"},
-        };
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        auto return_type = std::make_shared<DataTypeString>();
-
-        if constexpr (error_handling == ErrorHandling::Null)
-            return makeNullable(return_type);
-        else
-            return return_type;
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
-    {
-        auto col_res = ColumnString::create();
-        ColumnUInt8::MutablePtr col_res_null;
-
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
-            vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
-                arguments[0].column->getName(), getName());
-
-        if constexpr (error_handling == ErrorHandling::Null)
-            return ColumnNullable::create(std::move(col_res), std::move(col_res_null));
-        else
-            return col_res;
-    }
-
-private:
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -232,7 +202,7 @@ private:
 
 REGISTER_FUNCTION(Punycode)
 {
-    factory.registerFunction<FunctionPunycodeEncode<ErrorHandling::Throw>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycode<PunycodeEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeEncode(str)",
@@ -249,7 +219,7 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionPunycodeEncode<ErrorHandling::Null>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycode<PunycodeEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Returns NULL in case of error)",
         .syntax="punycodeEncode(str)",
@@ -266,7 +236,7 @@ Computes a Punycode representation of a string. Returns NULL in case of error)",
             }}
     });
 
-    factory.registerFunction<FunctionPunycodeDecode<ErrorHandling::Throw>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycode<PunycodeDecodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeDecode(str)",
@@ -283,7 +253,7 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionPunycodeDecode<ErrorHandling::Null>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycode<PunycodeDecodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Returns NULL in case of error)",
         .syntax="punycodeDecode(str)",

From 89beb32e646e54012171d4388d874ba8b80fc839 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 3 Jan 2024 13:10:14 +0100
Subject: [PATCH 118/204] Edit docs for toWeek()

---
 docs/en/sql-reference/functions/date-time-functions.md | 4 +++-
 docs/ru/sql-reference/functions/date-time-functions.md | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 0261589b968..5622097537e 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1483,7 +1483,9 @@ For mode values with a meaning of “with 4 or more days this year,” weeks are
 
 - Otherwise, it is the last week of the previous year, and the next week is week 1.
 
-For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day.
+For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1.
+It does not matter how many days in the new year the week contained, even if it contained only one day.
+I.e. if the last week of December contains January 1 of the next year, it will be week 1 of the next year.
 
 **Syntax**
 
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index fa5728a097d..cbbb456aa80 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -578,7 +578,9 @@ SELECT
 
 - В противном случае это последняя неделя предыдущего года, а следующая неделя - неделя 1.
 
-Для режимов со значением «содержит 1 января», неделя 1 – это неделя содержащая 1 января. Не имеет значения, сколько дней в новом году содержала неделя, даже если она содержала только один день.
+Для режимов со значением «содержит 1 января», неделя 1 – это неделя, содержащая 1 января. 
+Не имеет значения, сколько дней нового года содержит эта неделя, даже если она содержит только один день. 
+Так, если последняя неделя декабря содержит 1 января следующего года, то она считается неделей 1 следующего года.
 
 **Пример**
 

From be825b129053f1c47762e08142ffebf5761c1df8 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 3 Jan 2024 12:20:08 +0000
Subject: [PATCH 119/204] fix segfault when graphite table does not have agg
 function

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Processors/Merges/Algorithms/Graphite.h   |  7 +-
 .../config/config.d/graphite_alternative.xml  | 24 +++++++
 ...cated_merge_parameters_must_consistent.sql | 70 ++++++++++++++++---
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h
index 692e36d2eae..04bb4548c14 100644
--- a/src/Processors/Merges/Algorithms/Graphite.h
+++ b/src/Processors/Merges/Algorithms/Graphite.h
@@ -127,7 +127,12 @@ struct Pattern
     {
         hash.update(rule_type);
         hash.update(regexp_str);
-        hash.update(function->getName());
+        if (function)
+        {
+            hash.update(function->getName());
+            for (const auto & p : function->getParameters())
+                hash.update(toString(p));
+        }
         for (const auto & r : retentions)
         {
             hash.update(r.age);
diff --git a/tests/config/config.d/graphite_alternative.xml b/tests/config/config.d/graphite_alternative.xml
index 1a00de52af5..6c0bd13ce43 100644
--- a/tests/config/config.d/graphite_alternative.xml
+++ b/tests/config/config.d/graphite_alternative.xml
@@ -26,4 +26,28 @@
             </retention>
         </default>
     </graphite_rollup_alternative>
+    <graphite_rollup_alternative_no_function>
+        <version_column_name>Version</version_column_name>
+        <pattern>
+            <regexp>sum</regexp>
+            <retention>
+                <age>0</age>
+                <precision>600</precision>
+            </retention>
+            <retention>
+                <age>17280</age>
+                <precision>6000</precision>
+            </retention>
+        </pattern>
+        <default>
+            <retention>
+                <age>0</age>
+                <precision>600</precision>
+            </retention>
+            <retention>
+                <age>17280</age>
+                <precision>6000</precision>
+            </retention>
+        </default>
+    </graphite_rollup_alternative_no_function>
 </clickhouse>
diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
index 3c1bec4fb3f..0f452105e6d 100644
--- a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
+++ b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
@@ -8,13 +8,22 @@ CREATE TABLE t
 ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r1', legacy_ver)
 ORDER BY id;
 
-CREATE TABLE t_r
+CREATE TABLE t_r_ok
+(
+    `id` UInt64,
+    `val` String,
+    `legacy_ver` UInt64,
+)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2', legacy_ver)
+ORDER BY id;
+
+CREATE TABLE t_r_error
 (
     `id` UInt64,
     `val` String,
     `legacy_ver` UInt64
 )
-ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2')
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r3')
 ORDER BY id; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t2
@@ -27,14 +36,24 @@ CREATE TABLE t2
 ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r1', legacy_ver)
 ORDER BY id;
 
-CREATE TABLE t2_r
+CREATE TABLE t2_r_ok
 (
     `id` UInt64,
     `val` String,
     `legacy_ver` UInt64,
     `deleted` UInt8
 )
-ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver, deleted)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver)
+ORDER BY id;
+
+CREATE TABLE t2_r_error
+(
+    `id` UInt64,
+    `val` String,
+    `legacy_ver` UInt64,
+    `deleted` UInt8
+)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r3', legacy_ver, deleted)
 ORDER BY id; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t3
@@ -46,13 +65,23 @@ CREATE TABLE t3
 ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r1', metrics1)
 ORDER BY key;
 
-CREATE TABLE t3_r
+CREATE TABLE t3_r_ok
 (
     `key` UInt64,
     `metrics1` UInt64,
     `metrics2` UInt64
 )
-ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r2', metrics2)
+ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r2', metrics1)
+ORDER BY key;
+
+
+CREATE TABLE t3_r_error
+(
+    `key` UInt64,
+    `metrics1` UInt64,
+    `metrics2` UInt64
+)
+ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r3', metrics2)
 ORDER BY key; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t4
@@ -67,7 +96,7 @@ CREATE TABLE t4
 ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r1', 'graphite_rollup')
 ORDER BY key;
 
-CREATE TABLE t4_r
+CREATE TABLE t4_r_ok
 (
     `key` UInt32,
     `Path` String,
@@ -76,5 +105,30 @@ CREATE TABLE t4_r
     `Version` UInt32,
     `col` UInt64
 )
-ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r2', 'graphite_rollup_alternative')
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r2', 'graphite_rollup')
+ORDER BY key;
+
+CREATE TABLE t4_r_error
+(
+    `key` UInt32,
+    `Path` String,
+    `Time` DateTime('UTC'),
+    `Value` Float64,
+    `Version` UInt32,
+    `col` UInt64
+)
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r3', 'graphite_rollup_alternative')
 ORDER BY key; -- { serverError METADATA_MISMATCH }
+
+-- https://github.com/ClickHouse/ClickHouse/issues/58451
+CREATE TABLE t4_r_error_2
+(
+    `key` UInt32,
+    `Path` String,
+    `Time` DateTime('UTC'),
+    `Value` Float64,
+    `Version` UInt32,
+    `col` UInt64
+)
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r4', 'graphite_rollup_alternative_no_function')
+ORDER BY key; -- { serverError METADATA_MISMATCH }
\ No newline at end of file

From 74c3b0dff142fc19a84f5f0ad9105d84bb82912f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 3 Jan 2024 15:55:27 +0300
Subject: [PATCH 120/204] Update src/DataTypes/DataTypeTuple.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/DataTypes/DataTypeTuple.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index a26f0df630e..db8a14c537a 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -118,7 +118,7 @@ std::string DataTypeTuple::doGetPrettyName(size_t indent) const
     }
     else
     {
-        s << "Tuple(\n";
+        s << "Tuple(";
 
         for (size_t i = 0; i != size; ++i)
         {

From 1f960a32de2f63012fcba3f4cb1b28ebf596d64f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 14:04:30 +0100
Subject: [PATCH 121/204] Fix OSX build

---
 src/Common/iota.cpp                              |  3 +++
 src/Common/iota.h                                | 16 ++++++++++++++--
 src/Common/tests/gtest_hash_table.cpp            |  2 +-
 .../QueryPlan/ReadFromSystemNumbersStep.cpp      |  2 +-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
index 385d3b22207..98f18eb195b 100644
--- a/src/Common/iota.cpp
+++ b/src/Common/iota.cpp
@@ -30,4 +30,7 @@ void iota(T * begin, size_t count, T first_value)
 template void iota(UInt8 * begin, size_t count, UInt8 first_value);
 template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+#if defined(OS_DARWIN)
+template void iota(size_t * begin, size_t count, size_t first_value);
+#endif
 }
diff --git a/src/Common/iota.h b/src/Common/iota.h
index 485df4bd4f0..7910274d15d 100644
--- a/src/Common/iota.h
+++ b/src/Common/iota.h
@@ -10,13 +10,25 @@ namespace DB
 {
 
 /// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
+
 template <typename T>
-concept iota_supported_types = (is_any_of<T, UInt8, UInt32, UInt64>);
+concept iota_supported_types = (is_any_of<
+                                T,
+                                UInt8,
+                                UInt32,
+                                UInt64
+#if defined(OS_DARWIN)
+                                ,
+                                size_t
+#endif
+                                >);
 
 template <iota_supported_types T> void iota(T * begin, size_t count, T first_value);
 
 extern template void iota(UInt8 * begin, size_t count, UInt8 first_value);
 extern template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
-
+#if defined(OS_DARWIN)
+extern template void iota(size_t * begin, size_t count, size_t first_value);
+#endif
 }
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index ab7c3872170..ae432de7766 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -21,7 +21,7 @@ namespace
 std::vector<UInt64> getVectorWithNumbersUpToN(size_t n)
 {
     std::vector<UInt64> res(n);
-    iota(res.data(), res.size(), size_t(0));
+    iota(res.data(), res.size(), UInt64(0));
     return res;
 }
 
diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index 329497d66d3..5ccde0ba5bc 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -41,7 +41,7 @@ protected:
         auto column = ColumnUInt64::create(block_size);
         ColumnUInt64::Container & vec = column->getData();
 
-        size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
+        UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
         iota(pos, static_cast<size_t>(end - pos), curr);

From c8acc7c2d1d51c39c3a20dbcebc2eb03d49f0994 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 3 Jan 2024 14:44:00 +0100
Subject: [PATCH 122/204] Fix build

---
 src/Storages/StorageMaterializedView.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 59f1d5eee1b..934d57d40e2 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -72,7 +72,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
-    StorageID getTargetTableId() const { return target_table_id; }
+    StorageID getTargetTableId() const;
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;

From 9dc1f4d99c139a2913d075f3cf10adec37ff7a48 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 14:53:40 +0100
Subject: [PATCH 123/204] Update StorageMerge.cpp

---
 src/Storages/StorageMerge.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 58124973cbf..5d4f50baa53 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -51,7 +51,6 @@
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
-#include <Parsers/queryToString.h>
 
 namespace
 {

From 1c3364046e50e0c512eb84c58e3ee7e50998469c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 13:54:17 +0000
Subject: [PATCH 124/204] Fixup

---
 src/Planner/CollectTableExpressionData.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 492120141a4..4c48202af0b 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -77,12 +77,12 @@ public:
         {
             /// Replace ALIAS column with expression
             bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
-            if (column_already_exists)
-                return;
+            if (!column_already_exists)
+            {
+                auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+                table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
+            }
 
-            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
-
-            table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
             node = column_node->getExpression();
             visitImpl(node);
             return;

From 5308e24b8cc9ad3339117e183318e4f372bf43ce Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 14:12:56 +0000
Subject: [PATCH 125/204] Another fixup + reference update

---
 src/Planner/CollectTableExpressionData.cpp       |  3 ++-
 .../02514_analyzer_drop_join_on.reference        | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 4c48202af0b..38c986fd31f 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -109,7 +109,8 @@ public:
     {
         if (auto * join_node = parent->as<JoinNode>())
         {
-            return join_node->getJoinExpression() != child_node || !join_node->isUsingJoinExpression();
+            if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression())
+                return false;
         }
         auto child_node_type = child_node->getNodeType();
         return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index 51e009dcd91..7e94fdf1a42 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -55,35 +55,35 @@ Header: a2 String
   Header: a2_0 String
           k_2 UInt64
           d2_1 String
-    Expression (DROP unused columns after JOIN)
+    Expression ((Actions for left table alias column keys + DROP unused columns after JOIN))
     Header: a2_0 String
             k_2 UInt64
       Join (JOIN FillRightFirst)
       Header: a2_0 String
               k_2 UInt64
-        Expression (DROP unused columns after JOIN)
+        Expression ((Actions for left table alias column keys + DROP unused columns after JOIN))
         Header: a2_0 String
                 k_2 UInt64
           Join (JOIN FillRightFirst)
           Header: a2_0 String
                   k_2 UInt64
-            Expression (Change column names to column identifiers)
+            Expression ((Actions for left table alias column keys + Change column names to column identifiers))
             Header: a2_0 String
                     k_2 UInt64
               ReadFromMemoryStorage
               Header: a2 String
                       k UInt64
-            Expression (Change column names to column identifiers)
-            Header: k_3 UInt64
+            Expression ((Actions for right table alias column keys + Change column names to column identifiers))
+            Header: k_5 UInt64
               ReadFromMemoryStorage
               Header: k UInt64
-        Expression (Change column names to column identifiers)
+        Expression ((Actions for right table alias column keys + Change column names to column identifiers))
         Header: k_4 UInt64
           ReadFromMemoryStorage
           Header: k UInt64
-    Expression (Change column names to column identifiers)
+    Expression ((Actions for right table alias column keys + Change column names to column identifiers))
     Header: d2_1 String
-            k_5 UInt64
+            k_3 UInt64
       ReadFromMemoryStorage
       Header: d2 String
               k UInt64

From 31254826314fffb56f02a486297f8ba54a55173d Mon Sep 17 00:00:00 2001
From: Mark Needham <m.h.needham@gmail.com>
Date: Wed, 3 Jan 2024 14:25:03 +0000
Subject: [PATCH 126/204] Add output_format_decimal_trailing_zeros setting for
 trailing spaces

---
 .../functions/rounding-functions.md           | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 84839c2489c..3ede66cf316 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -53,7 +53,7 @@ The rounded number of the same type as the input number.
 **Example of use with Float**
 
 ``` sql
-SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3
+SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
 ```
 
 ``` text
@@ -67,7 +67,22 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3
 **Example of use with Decimal**
 
 ``` sql
-SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3
+SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3;
+```
+
+``` text
+┌───x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐
+│   0 │                                                0 │
+│ 0.5 │                                                1 │
+│   1 │                                                1 │
+└─────┴──────────────────────────────────────────────────┘
+```
+
+If you want to keep the trailing zeros, you need to enable `output_format_decimal_trailing_zeros`
+
+``` sql
+SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3 settings output_format_decimal_trailing_zeros=1;
+
 ```
 
 ``` text

From 7ee1697971e310d29aa00b4627f415b74b47b748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:16:45 +0000
Subject: [PATCH 127/204] Reduce setup time of min_max_index.xml

---
 tests/performance/min_max_index.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/min_max_index.xml b/tests/performance/min_max_index.xml
index b7b5d4fb991..518696144e2 100644
--- a/tests/performance/min_max_index.xml
+++ b/tests/performance/min_max_index.xml
@@ -1,7 +1,7 @@
 <test>
     <create_query>CREATE TABLE index_test (z UInt32, INDEX i_x (mortonDecode(2, z).1) TYPE minmax, INDEX i_y (mortonDecode(2, z).2) TYPE minmax) ENGINE = MergeTree ORDER BY z</create_query>
 
-    <fill_query>INSERT INTO index_test SELECT number FROM numbers(0x100000000) WHERE rand() % 3 = 1</fill_query>
+    <fill_query>INSERT INTO index_test SELECT number * 10 FROM numbers_mt(toUInt64(0x100000000 / 10)) SETTINGS max_insert_threads=8</fill_query>
 
     <query><![CDATA[
     SELECT count() FROM index_test WHERE mortonDecode(2, z).1 >= 20000 AND mortonDecode(2, z).1 <= 20100 AND mortonDecode(2, z).2 >= 10000 AND mortonDecode(2, z).2 <= 10100

From b8305e1a6e976cb040454089e57e6db97310d0e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:19:44 +0000
Subject: [PATCH 128/204] Make test more reasonable

---
 tests/performance/group_by_sundy_li.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 694fafcbbcd..46f659d9cc0 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -16,10 +16,10 @@
         ORDER BY (d, n)
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 

From 910b3385841297e442f6d349244db0052cc1c3e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:24:15 +0000
Subject: [PATCH 129/204] Reduce polymorphic_parts_m

---
 tests/performance/polymorphic_parts_l.xml | 4 ++--
 tests/performance/polymorphic_parts_m.xml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml
index d2ae9417bf7..66c5b73caa8 100644
--- a/tests/performance/polymorphic_parts_l.xml
+++ b/tests/performance/polymorphic_parts_l.xml
@@ -25,8 +25,8 @@
     </settings>
 
     <!-- 100 parts -->
-    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM  numbers(100000)</query>
-    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM  numbers(100000)</query>
+    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM numbers(100000)</query>
+    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000)</query>
     <query>INSERT INTO hits_buffer(UserID)  SELECT rand() FROM numbers(100000)</query>
 
     <drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>
diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml
index 54a81def55e..0a44038ffbd 100644
--- a/tests/performance/polymorphic_parts_m.xml
+++ b/tests/performance/polymorphic_parts_m.xml
@@ -25,8 +25,8 @@
     </settings>
 
     <!-- 100 parts -->
-    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM  numbers(10000)</query>
-    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000)</query>
+    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM numbers(10000)</query>
+    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000)</query>
     <query>INSERT INTO hits_buffer(UserID)  SELECT rand() FROM numbers(10000)</query>
 
     <drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>

From c223ae56d33723e52d331a19eb05d70b209792a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:29:30 +0000
Subject: [PATCH 130/204] Reduce the size of decimal_parse

---
 tests/performance/decimal_parse.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/decimal_parse.xml b/tests/performance/decimal_parse.xml
index 19e940b13df..966363d6fec 100644
--- a/tests/performance/decimal_parse.xml
+++ b/tests/performance/decimal_parse.xml
@@ -1,3 +1,3 @@
 <test>
-    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(toDecimal32OrZero(toString(rand() % 10000), 5))</query>
+    <query>SELECT count() FROM zeros(3000000) WHERE NOT ignore(toDecimal32OrZero(toString(rand() % 10000), 5))</query>
 </test>

From c1953206123ba0d8337212596ca64cf220365bc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:31:55 +0000
Subject: [PATCH 131/204] Reduce the size of join_used_flags.xml

---
 tests/performance/join_used_flags.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/join_used_flags.xml b/tests/performance/join_used_flags.xml
index cd2073ee106..70b0b45391d 100644
--- a/tests/performance/join_used_flags.xml
+++ b/tests/performance/join_used_flags.xml
@@ -1,6 +1,6 @@
 <test>
     <create_query>CREATE TABLE test_join_used_flags (i64 Int64, i32 Int32) ENGINE = Memory</create_query>
-    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers(20000000)</fill_query>
+    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(3000000)</fill_query>
     <query>SELECT l.i64, r.i64, l.i32, r.i32 FROM test_join_used_flags l RIGHT JOIN test_join_used_flags r USING i64 format Null</query>
     <drop_query>DROP TABLE IF EXISTS test_join_used_flags</drop_query>
 </test>

From d06de83ac14dd8aab015868c84ac341799be7294 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 17:44:28 +0000
Subject: [PATCH 132/204] Fix KeyCondition for file/url/s3

---
 src/Processors/SourceWithKeyCondition.h       | 34 ++++++++-----------
 src/Storages/StorageFile.cpp                  |  7 ++--
 src/Storages/StorageS3.cpp                    |  7 ++--
 src/Storages/StorageURL.cpp                   |  7 ++--
 src/Storages/VirtualColumnUtils.cpp           | 34 +++++++++++++++++++
 .../02725_parquet_preserve_order.reference    |  4 +--
 6 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h
index 9e641cc8c51..c9617d3e73e 100644
--- a/src/Processors/SourceWithKeyCondition.h
+++ b/src/Processors/SourceWithKeyCondition.h
@@ -18,31 +18,25 @@ protected:
 
     void setKeyConditionImpl(const SelectQueryInfo & query_info, ContextPtr context, const Block & keys)
     {
-        if (!context->getSettingsRef().allow_experimental_analyzer)
-        {
-            key_condition = std::make_shared<const KeyCondition>(
-                query_info,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
+        key_condition = std::make_shared<const KeyCondition>(
+            query_info,
+            context,
+            keys.getNames(),
+            std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
     }
 
     void setKeyConditionImpl(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context, const Block & keys)
     {
-        if (context->getSettingsRef().allow_experimental_analyzer)
-        {
-            std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
-            for (const auto & column : keys.getColumnsWithTypeAndName())
-                node_name_to_input_column.insert({column.name, column});
+        std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
+        for (const auto & column : keys.getColumnsWithTypeAndName())
+            node_name_to_input_column.insert({column.name, column});
 
-            auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
-            key_condition = std::make_shared<const KeyCondition>(
-                filter_actions_dag,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
+        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
+        key_condition = std::make_shared<const KeyCondition>(
+            filter_actions_dag,
+            context,
+            keys.getNames(),
+            std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
     }
 
 public:
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 60e06291200..f3917b878d6 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1469,14 +1469,17 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
         if (storage->has_peekable_read_buffer_from_fd.exchange(false))
             read_buffer = std::move(storage->peekable_read_buffer_from_fd);
 
-        pipes.emplace_back(std::make_shared<StorageFileSource>(
+        auto source = std::make_shared<StorageFileSource>(
             info,
             storage,
             context,
             max_block_size,
             files_iterator,
             std::move(read_buffer),
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, context);
+        pipes.emplace_back(std::move(source));
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index b6d96e21e33..ce49be32120 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1255,7 +1255,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     pipes.reserve(num_streams);
     for (size_t i = 0; i < num_streams; ++i)
     {
-        pipes.emplace_back(std::make_shared<StorageS3Source>(
+        auto source = std::make_shared<StorageS3Source>(
             read_from_format_info,
             query_configuration.format,
             storage.getName(),
@@ -1270,7 +1270,10 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
             iterator_wrapper,
             max_parsing_threads,
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, local_context);
+        pipes.emplace_back(std::move(source));
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 9ace7775d4b..c0e4be36202 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1064,7 +1064,7 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
 
     for (size_t i = 0; i < num_streams; ++i)
     {
-        pipes.emplace_back(std::make_shared<StorageURLSource>(
+        auto source = std::make_shared<StorageURLSource>(
             info,
             iterator_wrapper,
             storage->getReadMethod(),
@@ -1080,7 +1080,10 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
             storage->headers,
             read_uri_params,
             is_url_with_globs,
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, context);
+        pipes.emplace_back(std::move(source));
     }
 
     if (uri_options)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 20e9a5ea174..76138bbea87 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -36,7 +36,10 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <IO/WriteHelpers.h>
 #include <Common/typeid_cast.h>
+#include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
+#include "Functions/IFunctionAdaptors.h"
+#include "Functions/indexHint.h"
 #include <Parsers/makeASTForLogicalFunction.h>
 #include <Columns/ColumnSet.h>
 #include <Functions/FunctionHelpers.h>
@@ -519,6 +522,37 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
 
             return &node_copy;
         }
+        else if (node->function_base->getName() == "indexHint")
+        {
+            if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node->function_base.get()))
+            {
+                if (const auto * index_hint = typeid_cast<const FunctionIndexHint *>(adaptor->getFunction().get()))
+                {
+                    auto index_hint_dag = index_hint->getActions()->clone();
+                    ActionsDAG::NodeRawConstPtrs atoms;
+                    for (const auto & output : index_hint_dag->getOutputs())
+                        if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
+                            atoms.push_back(child_copy);
+
+                    if (!atoms.empty())
+                    {
+                        const auto * res = atoms.at(0);
+
+                        if (atoms.size() > 1)
+                        {
+                            FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+                            res = &index_hint_dag->addFunction(func_builder_and, atoms, {});
+                        }
+
+                        if (!res->result_type->equals(*node->result_type))
+                            res = &index_hint_dag->addCast(*res, node->result_type, {});
+
+                        additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(*index_hint_dag)));
+                        return res;
+                    }
+                }
+            }
+        }
     }
 
     if (!canEvaluateSubtree(node, allowed_inputs))
diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.reference b/tests/queries/0_stateless/02725_parquet_preserve_order.reference
index e9c8f99bb33..3f410c13ec4 100644
--- a/tests/queries/0_stateless/02725_parquet_preserve_order.reference
+++ b/tests/queries/0_stateless/02725_parquet_preserve_order.reference
@@ -3,10 +3,10 @@
 2
 (Expression)
 ExpressionTransform
-  (ReadFromStorage)
+  (ReadFromFile)
   File 0 → 1
 (Expression)
 ExpressionTransform × 2
-  (ReadFromStorage)
+  (ReadFromFile)
   Resize 1 → 2
     File 0 → 1

From 7a271f09ed134d339a3f87397cc8e2306d536242 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 17:50:46 +0000
Subject: [PATCH 133/204] Check if I can remove KeyCondition analysis on AST.

---
 src/Core/Settings.h                           |   2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |  21 ++-
 src/Interpreters/RequiredSourceColumnsData.h  |   1 -
 .../RequiredSourceColumnsVisitor.cpp          |   5 -
 src/Interpreters/TreeRewriter.cpp             |   9 +-
 src/Interpreters/TreeRewriter.h               |   2 +-
 .../optimizeUseAggregateProjection.cpp        |   4 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |  71 +++-----
 .../QueryPlan/ReadFromPreparedSource.cpp      |  16 +-
 src/Processors/SourceWithKeyCondition.h       |  38 ++--
 src/Storages/Hive/StorageHive.cpp             | 166 ++++++++++++++----
 src/Storages/Hive/StorageHive.h               |  18 +-
 src/Storages/IStorage.h                       |   2 +-
 src/Storages/MergeTree/KeyCondition.h         |   3 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |  31 ++--
 src/Storages/MergeTree/MergeTreeData.h        |   5 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  28 ---
 .../MergeTree/MergeTreeDataSelectExecutor.h   |   6 -
 .../MergeTree/MergeTreeIndexAnnoy.cpp         |   6 +
 src/Storages/MergeTree/MergeTreeIndexAnnoy.h  |   6 +-
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |   4 +-
 .../MergeTree/MergeTreeIndexBloomFilter.h     |   2 +-
 .../MergeTreeIndexConditionBloomFilter.cpp    |  29 +--
 .../MergeTreeIndexConditionBloomFilter.h      |   3 +-
 .../MergeTree/MergeTreeIndexFullText.cpp      |  32 +---
 .../MergeTree/MergeTreeIndexFullText.h        |   7 +-
 .../MergeTree/MergeTreeIndexHypothesis.cpp    |   2 +-
 .../MergeTree/MergeTreeIndexHypothesis.h      |   2 +-
 .../MergeTree/MergeTreeIndexInverted.cpp      |  43 ++---
 .../MergeTree/MergeTreeIndexInverted.h        |   4 +-
 .../MergeTree/MergeTreeIndexMinMax.cpp        |  15 +-
 src/Storages/MergeTree/MergeTreeIndexMinMax.h |   4 +-
 src/Storages/MergeTree/MergeTreeIndexSet.cpp  |  48 ++---
 src/Storages/MergeTree/MergeTreeIndexSet.h    |   4 +-
 .../MergeTree/MergeTreeIndexUSearch.cpp       |   6 +
 .../MergeTree/MergeTreeIndexUSearch.h         |   4 +-
 src/Storages/MergeTree/MergeTreeIndices.h     |   4 +-
 src/Storages/MergeTree/PartitionPruner.cpp    |   5 +-
 src/Storages/StorageFile.cpp                  |   5 -
 src/Storages/StorageFile.h                    |   2 -
 src/Storages/StorageMergeTree.cpp             |   4 +-
 src/Storages/StorageMergeTree.h               |   2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |   4 +-
 src/Storages/StorageReplicatedMergeTree.h     |   2 +-
 src/Storages/StorageS3.h                      |   5 -
 src/Storages/StorageURL.h                     |   5 -
 46 files changed, 303 insertions(+), 384 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 988c4f357e0..6bc146d5cd1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -707,7 +707,6 @@ class IColumn;
     M(Bool, query_plan_execute_functions_after_sorting, true, "Allow to re-order functions after sorting", 0) \
     M(Bool, query_plan_reuse_storage_ordering_for_window_functions, true, "Allow to use the storage sorting for window functions", 0) \
     M(Bool, query_plan_lift_up_union, true, "Allow to move UNIONs up so that more parts of the query plan can be optimized", 0) \
-    M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
     M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimization", 0) \
     M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimization", 0) \
     M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
@@ -916,6 +915,7 @@ class IColumn;
     MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \
+    MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index cdf1b4228bc..ca16c550257 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2378,12 +2378,23 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
     else
     {
         // It's possible to optimize count() given only partition predicates
-        SelectQueryInfo temp_query_info;
-        temp_query_info.query = query_ptr;
-        temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
-        temp_query_info.prepared_sets = query_analyzer->getPreparedSets();
+        ActionsDAG::NodeRawConstPtrs filter_nodes;
+        if (analysis_result.hasPrewhere())
+        {
+            auto & prewhere_info = analysis_result.prewhere_info;
+            filter_nodes.push_back(prewhere_info->prewhere_actions->tryFindInOutputs(prewhere_info->prewhere_column_name));
 
-        return storage->totalRowsByPartitionPredicate(temp_query_info, context);
+            if (prewhere_info->row_level_filter)
+                filter_nodes.push_back(prewhere_info->row_level_filter->tryFindInOutputs(prewhere_info->row_level_column_name));
+        }
+        if (analysis_result.hasWhere())
+        {
+            filter_nodes.push_back(analysis_result.before_where->tryFindInOutputs(analysis_result.where_column_name));
+        }
+
+        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes, {}, context);
+
+        return storage->totalRowsByPartitionPredicate(filter_actions_dag, context);
     }
 }
 
diff --git a/src/Interpreters/RequiredSourceColumnsData.h b/src/Interpreters/RequiredSourceColumnsData.h
index dd4e2dc3d68..501f6961efa 100644
--- a/src/Interpreters/RequiredSourceColumnsData.h
+++ b/src/Interpreters/RequiredSourceColumnsData.h
@@ -36,7 +36,6 @@ struct RequiredSourceColumnsData
 
     bool has_table_join = false;
     bool has_array_join = false;
-    bool visit_index_hint = false;
 
     bool addColumnAliasIfAny(const IAST & ast);
     void addColumnIdentifier(const ASTIdentifier & node);
diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp
index c07d783788a..3971c8b58f4 100644
--- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp
+++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp
@@ -72,11 +72,6 @@ void RequiredSourceColumnsMatcher::visit(const ASTPtr & ast, Data & data)
     }
     if (auto * t = ast->as<ASTFunction>())
     {
-        /// "indexHint" is a special function for index analysis.
-        /// Everything that is inside it is not calculated. See KeyCondition
-        if (!data.visit_index_hint && t->name == "indexHint")
-            return;
-
         data.addColumnAliasIfAny(*ast);
         visit(*t, ast, data);
         return;
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 9cbf24091e3..6ed3ff2f1e6 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -995,13 +995,12 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
 /// Calculate which columns are required to execute the expression.
 /// Then, delete all other columns from the list of available columns.
 /// After execution, columns will only contain the list of columns needed to read from the table.
-bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint, bool no_throw)
+bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select, bool no_throw)
 {
     /// We calculate required_source_columns with source_columns modifications and swap them on exit
     required_source_columns = source_columns;
 
     RequiredSourceColumnsVisitor::Data columns_context;
-    columns_context.visit_index_hint = visit_index_hint;
     RequiredSourceColumnsVisitor(columns_context).visit(query);
 
     NameSet source_column_names;
@@ -1385,7 +1384,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     result.window_function_asts = getWindowFunctions(query, *select_query);
     result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
 
-    result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
+    result.collectUsedColumns(query, true);
 
     if (!result.missed_subcolumns.empty())
     {
@@ -1422,7 +1421,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
             result.aggregates = getAggregates(query, *select_query);
             result.window_function_asts = getWindowFunctions(query, *select_query);
             result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
-            result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
+            result.collectUsedColumns(query, true);
         }
     }
 
@@ -1499,7 +1498,7 @@ TreeRewriterResultPtr TreeRewriter::analyze(
     else
         assertNoAggregates(query, "in wrong place");
 
-    bool is_ok = result.collectUsedColumns(query, false, settings.query_plan_optimize_primary_key, no_throw);
+    bool is_ok = result.collectUsedColumns(query, false, no_throw);
     if (!is_ok)
         return {};
 
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index 1858488afa3..205b4760423 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -88,7 +88,7 @@ struct TreeRewriterResult
         bool add_special = true);
 
     void collectSourceColumns(bool add_special);
-    bool collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint, bool no_throw = false);
+    bool collectUsedColumns(const ASTPtr & query, bool is_select, bool no_throw = false);
     Names requiredSourceColumns() const { return required_source_columns.getNames(); }
     const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; }
     NameSet getArrayJoinSourceNameSet() const;
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index c5e42e76653..efb75f74415 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -436,7 +436,6 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
     AggregateProjectionCandidates candidates;
 
     const auto & parts = reading.getParts();
-    const auto & query_info = reading.getQueryInfo();
 
     const auto metadata = reading.getStorageMetadata();
     ContextPtr context = reading.getContext();
@@ -481,8 +480,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
             auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock(
                 metadata,
                 candidate.dag->getRequiredColumnsNames(),
-                dag.filter_node != nullptr,
-                query_info,
+                dag.dag,
                 parts,
                 max_added_blocks.get(),
                 context);
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index aa1c463e4e6..de87f041bf0 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -37,6 +37,8 @@
 #include <Common/JSONBuilder.h>
 #include <Common/isLocalAddress.h>
 #include <Common/logger_useful.h>
+#include "Storages/MergeTree/MergeTreeIndexAnnoy.h"
+#include "Storages/MergeTree/MergeTreeIndexUSearch.h"
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Parsers/ExpressionListParsers.h>
 
@@ -1331,26 +1333,12 @@ static void buildIndexes(
     const Names & primary_key_column_names = primary_key.column_names;
 
     const auto & settings = context->getSettingsRef();
-    if (settings.query_plan_optimize_primary_key)
-    {
-        NameSet array_join_name_set;
-        if (query_info.syntax_analyzer_result)
-            array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet();
 
-        indexes.emplace(ReadFromMergeTree::Indexes{{
-            filter_actions_dag,
-            context,
-            primary_key_column_names,
-            primary_key.expression}, {}, {}, {}, {}, false, {}});
-    }
-    else
-    {
-        indexes.emplace(ReadFromMergeTree::Indexes{{
-            query_info,
-            context,
-            primary_key_column_names,
-            primary_key.expression}, {}, {}, {}, {}, false, {}});
-    }
+    indexes.emplace(ReadFromMergeTree::Indexes{{
+        filter_actions_dag,
+        context,
+        primary_key_column_names,
+        primary_key.expression}, {}, {}, {}, {}, false, {}});
 
     if (metadata_snapshot->hasPartitionKey())
     {
@@ -1363,11 +1351,7 @@ static void buildIndexes(
     }
 
     /// TODO Support row_policy_filter and additional_filters
-    if (settings.allow_experimental_analyzer)
-        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
-    else
-        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context);
-
+    indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
     MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context);
 
     indexes->use_skip_indexes = settings.use_skip_indexes;
@@ -1434,7 +1418,19 @@ static void buildIndexes(
             }
             else
             {
-                auto condition = index_helper->createIndexCondition(*info, context);
+                MergeTreeIndexConditionPtr condition;
+                if (index_helper->isVectorSearch())
+                {
+                    if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
+                        condition = annoy->createIndexCondition(*info, context);
+                    else if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
+                        condition = usearch->createIndexCondition(*info, context);
+                    else
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
+                }
+                else
+                    condition = index_helper->createIndexCondition(filter_actions_dag, context);
+
                 if (!condition->alwaysUnknownOrTrue())
                     skip_indexes.useful_indices.emplace_back(index_helper, condition);
             }
@@ -1467,34 +1463,15 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
     Poco::Logger * log,
     std::optional<Indexes> & indexes)
 {
-    const auto & settings = context->getSettingsRef();
-    if (settings.allow_experimental_analyzer || settings.query_plan_optimize_primary_key)
-    {
-        auto updated_query_info_with_filter_dag = query_info;
-        updated_query_info_with_filter_dag.filter_actions_dag = buildFilterDAG(context, prewhere_info, added_filter_nodes, query_info);
-
-        return selectRangesToReadImpl(
-            std::move(parts),
-            std::move(alter_conversions),
-            metadata_snapshot_base,
-            metadata_snapshot,
-            updated_query_info_with_filter_dag,
-            context,
-            num_streams,
-            max_block_numbers_to_read,
-            data,
-            real_column_names,
-            sample_factor_column_queried,
-            log,
-            indexes);
-    }
+    auto updated_query_info_with_filter_dag = query_info;
+    updated_query_info_with_filter_dag.filter_actions_dag = buildFilterDAG(context, prewhere_info, added_filter_nodes, query_info);
 
     return selectRangesToReadImpl(
         std::move(parts),
         std::move(alter_conversions),
         metadata_snapshot_base,
         metadata_snapshot,
-        query_info,
+        updated_query_info_with_filter_dag,
         context,
         num_streams,
         max_block_numbers_to_read,
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
index 798073f94d3..e7b170f0f91 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
@@ -30,19 +30,9 @@ void ReadFromStorageStep::applyFilters()
     if (!context)
         return;
 
-    std::shared_ptr<const KeyCondition> key_condition;
-    if (!context->getSettingsRef().allow_experimental_analyzer)
-    {
-        for (const auto & processor : pipe.getProcessors())
-            if (auto * source = dynamic_cast<SourceWithKeyCondition *>(processor.get()))
-                source->setKeyCondition(query_info, context);
-    }
-    else
-    {
-        for (const auto & processor : pipe.getProcessors())
-            if (auto * source = dynamic_cast<SourceWithKeyCondition *>(processor.get()))
-                source->setKeyCondition(filter_nodes.nodes, context);
-    }
+    for (const auto & processor : pipe.getProcessors())
+        if (auto * source = dynamic_cast<SourceWithKeyCondition *>(processor.get()))
+            source->setKeyCondition(filter_nodes.nodes, context);
 }
 
 }
diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h
index 9e641cc8c51..82d46eb74a4 100644
--- a/src/Processors/SourceWithKeyCondition.h
+++ b/src/Processors/SourceWithKeyCondition.h
@@ -16,33 +16,18 @@ protected:
     /// Represents pushed down filters in source
     std::shared_ptr<const KeyCondition> key_condition;
 
-    void setKeyConditionImpl(const SelectQueryInfo & query_info, ContextPtr context, const Block & keys)
-    {
-        if (!context->getSettingsRef().allow_experimental_analyzer)
-        {
-            key_condition = std::make_shared<const KeyCondition>(
-                query_info,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
-    }
-
     void setKeyConditionImpl(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context, const Block & keys)
     {
-        if (context->getSettingsRef().allow_experimental_analyzer)
-        {
-            std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
-            for (const auto & column : keys.getColumnsWithTypeAndName())
-                node_name_to_input_column.insert({column.name, column});
+        std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
+        for (const auto & column : keys.getColumnsWithTypeAndName())
+            node_name_to_input_column.insert({column.name, column});
 
-            auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
-            key_condition = std::make_shared<const KeyCondition>(
-                filter_actions_dag,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
+        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
+        key_condition = std::make_shared<const KeyCondition>(
+            filter_actions_dag,
+            context,
+            keys.getNames(),
+            std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
     }
 
 public:
@@ -52,10 +37,7 @@ public:
     /// Set key_condition directly. It is used for filter push down in source.
     virtual void setKeyCondition(const std::shared_ptr<const KeyCondition> & key_condition_) { key_condition = key_condition_; }
 
-    /// Set key_condition created by query_info and context. It is used for filter push down when allow_experimental_analyzer is false.
-    virtual void setKeyCondition(const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/) { }
-
-    /// Set key_condition created by nodes and context. It is used for filter push down when allow_experimental_analyzer is true.
+    /// Set key_condition created by nodes and context.
     virtual void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & /*nodes*/, ContextPtr /*context*/) { }
 };
 }
diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index f03136e4edf..0c4e4f956a0 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -29,10 +29,14 @@
 #include <Parsers/ASTLiteral.h>
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipeline.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Processors/ISource.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <Processors/Sources/NullSource.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/HDFS/ReadBufferFromHDFS.h>
 #include <Storages/HDFS/AsynchronousReadBufferFromHDFS.h>
@@ -123,7 +127,6 @@ public:
         String compression_method_,
         Block sample_block_,
         ContextPtr context_,
-        const SelectQueryInfo & query_info_,
         UInt64 max_block_size_,
         const StorageHive & storage_,
         const Names & text_input_field_names_ = {})
@@ -140,7 +143,6 @@ public:
         , text_input_field_names(text_input_field_names_)
         , format_settings(getFormatSettings(getContext()))
         , read_settings(getContext()->getReadSettings())
-        , query_info(query_info_)
     {
         to_read_block = sample_block;
 
@@ -395,7 +397,6 @@ private:
     const Names & text_input_field_names;
     FormatSettings format_settings;
     ReadSettings read_settings;
-    SelectQueryInfo query_info;
 
     HiveFilePtr current_file;
     String current_path;
@@ -574,7 +575,7 @@ static HiveFilePtr createHiveFile(
 
 HiveFiles StorageHive::collectHiveFilesFromPartition(
     const Apache::Hadoop::Hive::Partition & partition,
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_actions_dag,
     const HiveTableMetadataPtr & hive_table_metadata,
     const HDFSFSPtr & fs,
     const ContextPtr & context_,
@@ -638,7 +639,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition(
         for (size_t i = 0; i < partition_names.size(); ++i)
             ranges.emplace_back(fields[i]);
 
-        const KeyCondition partition_key_condition(query_info, getContext(), partition_names, partition_minmax_idx_expr);
+        const KeyCondition partition_key_condition(filter_actions_dag, getContext(), partition_names, partition_minmax_idx_expr);
         if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true)
             return {};
     }
@@ -648,7 +649,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition(
     hive_files.reserve(file_infos.size());
     for (const auto & file_info : file_infos)
     {
-        auto hive_file = getHiveFileIfNeeded(file_info, fields, query_info, hive_table_metadata, context_, prune_level);
+        auto hive_file = getHiveFileIfNeeded(file_info, fields, filter_actions_dag, hive_table_metadata, context_, prune_level);
         if (hive_file)
         {
             LOG_TRACE(
@@ -672,7 +673,7 @@ StorageHive::listDirectory(const String & path, const HiveTableMetadataPtr & hiv
 HiveFilePtr StorageHive::getHiveFileIfNeeded(
     const FileInfo & file_info,
     const FieldVector & fields,
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_actions_dag,
     const HiveTableMetadataPtr & hive_table_metadata,
     const ContextPtr & context_,
     PruneLevel prune_level) const
@@ -706,7 +707,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded(
 
     if (prune_level >= PruneLevel::File)
     {
-        const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
+        const KeyCondition hivefile_key_condition(filter_actions_dag, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
         if (hive_file->useFileMinMaxIndex())
         {
             /// Load file level minmax index and apply
@@ -758,10 +759,77 @@ bool StorageHive::supportsSubsetOfColumns() const
     return format_name == "Parquet" || format_name == "ORC";
 }
 
-Pipe StorageHive::read(
+class ReadFromHive : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromHive"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromHive(
+        Block header,
+        std::shared_ptr<StorageHive> storage_,
+        std::shared_ptr<StorageHiveSource::SourcesInfo> sources_info_,
+        HDFSBuilderWrapper builder_,
+        HDFSFSPtr fs_,
+        HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata_,
+        Block sample_block_,
+        Poco::Logger * log_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(header)})
+        , storage(std::move(storage_))
+        , sources_info(std::move(sources_info_))
+        , builder(std::move(builder_))
+        , fs(std::move(fs_))
+        , hive_table_metadata(std::move(hive_table_metadata_))
+        , sample_block(std::move(sample_block_))
+        , log(log_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageHive> storage;
+    std::shared_ptr<StorageHiveSource::SourcesInfo> sources_info;
+    HDFSBuilderWrapper builder;
+    HDFSFSPtr fs;
+    HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata;
+    Block sample_block;
+    Poco::Logger * log;
+
+    ContextPtr context;
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::optional<HiveFiles> hive_files;
+
+    void createFiles(const ActionsDAGPtr & filter_actions_dag);
+};
+
+void ReadFromHive::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    createFiles(filter_actions_dag);
+}
+
+void ReadFromHive::createFiles(const ActionsDAGPtr & filter_actions_dag)
+{
+    if (hive_files)
+        return;
+
+    hive_files = storage->collectHiveFiles(num_streams, filter_actions_dag, hive_table_metadata, fs, context);
+    LOG_INFO(log, "Collect {} hive files to read", hive_files->size());
+}
+
+void StorageHive::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
+    SelectQueryInfo &,
     ContextPtr context_,
     QueryProcessingStage::Enum /* processed_stage */,
     size_t max_block_size,
@@ -774,15 +842,7 @@ Pipe StorageHive::read(
     auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url);
     auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table);
 
-    /// Collect Hive files to read
-    HiveFiles hive_files = collectHiveFiles(num_streams, query_info, hive_table_metadata, fs, context_);
-    LOG_INFO(log, "Collect {} hive files to read", hive_files.size());
-
-    if (hive_files.empty())
-        return {};
-
     auto sources_info = std::make_shared<StorageHiveSource::SourcesInfo>();
-    sources_info->hive_files = std::move(hive_files);
     sources_info->database_name = hive_database;
     sources_info->table_name = hive_table;
     sources_info->hive_metastore_client = hive_metastore_client;
@@ -822,6 +882,36 @@ Pipe StorageHive::read(
             sources_info->need_file_column = true;
     }
 
+    auto this_ptr = std::static_pointer_cast<StorageHive>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromHive>(
+        StorageHiveSource::getHeader(sample_block, sources_info),
+        std::move(this_ptr),
+        std::move(sources_info),
+        std::move(builder),
+        std::move(fs),
+        std::move(hive_table_metadata),
+        std::move(sample_block),
+        log,
+        context_,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createFiles(nullptr);
+
+    if (hive_files->empty())
+    {
+        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputStream().header)));
+        return;
+    }
+
+    sources_info->hive_files = std::move(*hive_files);
+
     if (num_streams > sources_info->hive_files.size())
         num_streams = sources_info->hive_files.size();
 
@@ -830,22 +920,29 @@ Pipe StorageHive::read(
     {
         pipes.emplace_back(std::make_shared<StorageHiveSource>(
             sources_info,
-            hdfs_namenode_url,
-            format_name,
-            compression_method,
+            storage->hdfs_namenode_url,
+            storage->format_name,
+            storage->compression_method,
             sample_block,
-            context_,
-            query_info,
+            context,
             max_block_size,
-            *this,
-            text_input_field_names));
+            *storage,
+            storage->text_input_field_names));
     }
-    return Pipe::unitePipes(std::move(pipes));
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 HiveFiles StorageHive::collectHiveFiles(
     size_t max_threads,
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_actions_dag,
     const HiveTableMetadataPtr & hive_table_metadata,
     const HDFSFSPtr & fs,
     const ContextPtr & context_,
@@ -871,7 +968,7 @@ HiveFiles StorageHive::collectHiveFiles(
                 [&]()
                 {
                     auto hive_files_in_partition
-                        = collectHiveFilesFromPartition(partition, query_info, hive_table_metadata, fs, context_, prune_level);
+                        = collectHiveFilesFromPartition(partition, filter_actions_dag, hive_table_metadata, fs, context_, prune_level);
                     if (!hive_files_in_partition.empty())
                     {
                         std::lock_guard lock(hive_files_mutex);
@@ -897,7 +994,7 @@ HiveFiles StorageHive::collectHiveFiles(
             pool.scheduleOrThrowOnError(
                 [&]()
                 {
-                    auto hive_file = getHiveFileIfNeeded(file_info, {}, query_info, hive_table_metadata, context_, prune_level);
+                    auto hive_file = getHiveFileIfNeeded(file_info, {}, filter_actions_dag, hive_table_metadata, context_, prune_level);
                     if (hive_file)
                     {
                         std::lock_guard lock(hive_files_mutex);
@@ -925,13 +1022,12 @@ NamesAndTypesList StorageHive::getVirtuals() const
 std::optional<UInt64> StorageHive::totalRows(const Settings & settings) const
 {
     /// query_info is not used when prune_level == PruneLevel::None
-    SelectQueryInfo query_info;
-    return totalRowsImpl(settings, query_info, getContext(), PruneLevel::None);
+    return totalRowsImpl(settings, nullptr, getContext(), PruneLevel::None);
 }
 
-std::optional<UInt64> StorageHive::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const
+std::optional<UInt64> StorageHive::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const
 {
-    return totalRowsImpl(context_->getSettingsRef(), query_info, context_, PruneLevel::Partition);
+    return totalRowsImpl(context_->getSettingsRef(), filter_actions_dag, context_, PruneLevel::Partition);
 }
 
 void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const
@@ -946,7 +1042,7 @@ void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPt
 }
 
 std::optional<UInt64>
-StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const
+StorageHive::totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const
 {
     /// Row-based format like Text doesn't support totalRowsByPartitionPredicate
     if (!supportsSubsetOfColumns())
@@ -958,7 +1054,7 @@ StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & qu
     HDFSFSPtr fs = createHDFSFS(builder.get());
     HiveFiles hive_files = collectHiveFiles(
         settings.max_threads,
-        query_info,
+        filter_actions_dag,
         hive_table_metadata,
         fs,
         context_,
diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h
index 8b378bf9e54..b0ec96604cc 100644
--- a/src/Storages/Hive/StorageHive.h
+++ b/src/Storages/Hive/StorageHive.h
@@ -42,10 +42,11 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
+        SelectQueryInfo &,
         ContextPtr context,
         QueryProcessingStage::Enum processed_stage,
         size_t max_block_size,
@@ -58,9 +59,12 @@ public:
     bool supportsSubsetOfColumns() const;
 
     std::optional<UInt64> totalRows(const Settings & settings) const override;
-    std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override;
+    std::optional<UInt64> totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const override;
     void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override;
 
+protected:
+    friend class ReadFromHive;
+
 private:
     using FileFormat = IHiveFile::FileFormat;
     using FileInfo = HiveMetastoreClient::FileInfo;
@@ -88,7 +92,7 @@ private:
 
     HiveFiles collectHiveFiles(
         size_t max_threads,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_actions_dag,
         const HiveTableMetadataPtr & hive_table_metadata,
         const HDFSFSPtr & fs,
         const ContextPtr & context_,
@@ -96,7 +100,7 @@ private:
 
     HiveFiles collectHiveFilesFromPartition(
         const Apache::Hadoop::Hive::Partition & partition,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_actions_dag,
         const HiveTableMetadataPtr & hive_table_metadata,
         const HDFSFSPtr & fs,
         const ContextPtr & context_,
@@ -105,7 +109,7 @@ private:
     HiveFilePtr getHiveFileIfNeeded(
         const FileInfo & file_info,
         const FieldVector & fields,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_actions_dag,
         const HiveTableMetadataPtr & hive_table_metadata,
         const ContextPtr & context_,
         PruneLevel prune_level = PruneLevel::Max) const;
@@ -113,7 +117,7 @@ private:
     void lazyInitialize();
 
     std::optional<UInt64>
-    totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const;
+    totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const;
 
     String hive_metastore_url;
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 1102c77ca58..4fa6bfdd617 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -669,7 +669,7 @@ public:
     virtual std::optional<UInt64> totalRows(const Settings &) const { return {}; }
 
     /// Same as above but also take partition predicate into account.
-    virtual std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const { return {}; }
+    virtual std::optional<UInt64> totalRowsByPartitionPredicate(const ActionsDAGPtr &, ContextPtr) const { return {}; }
 
     /// If it is possible to quickly determine exact number of bytes for the table on storage:
     /// - memory (approximated, resident)
diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h
index 980c248835d..e291eb7b98c 100644
--- a/src/Storages/MergeTree/KeyCondition.h
+++ b/src/Storages/MergeTree/KeyCondition.h
@@ -38,7 +38,7 @@ struct ActionDAGNodes;
   */
 class KeyCondition
 {
-public:
+private:
     /// Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters
     KeyCondition(
         const ASTPtr & query,
@@ -63,6 +63,7 @@ public:
         bool single_point_ = false,
         bool strict_ = false);
 
+public:
     /// Construct key condition from ActionsDAG nodes
     KeyCondition(
         ActionsDAGPtr filter_dag,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1c80778f1ca..80e0e430d19 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1075,26 +1075,25 @@ Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPar
 
 
 std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
-    const SelectQueryInfo & query_info, ContextPtr local_context, const DataPartsVector & parts) const
+    const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const
 {
     if (parts.empty())
         return 0u;
     auto metadata_snapshot = getInMemoryMetadataPtr();
-    ASTPtr expression_ast;
     Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
     // Generate valid expressions for filtering
-    bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, local_context, virtual_columns_block, expression_ast);
+    auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), virtual_columns_block);
 
-    PartitionPruner partition_pruner(metadata_snapshot, query_info, local_context, true /* strict */);
-    if (partition_pruner.isUseless() && !valid)
+    PartitionPruner partition_pruner(metadata_snapshot, filter_actions_dag, local_context, true /* strict */);
+    if (partition_pruner.isUseless() && !filter_dag)
         return {};
 
     std::unordered_set<String> part_values;
-    if (valid && expression_ast)
+    if (filter_dag)
     {
         virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
-        VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, local_context, expression_ast);
+        VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
         part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
         if (part_values.empty())
             return 0;
@@ -6617,8 +6616,7 @@ using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
 Block MergeTreeData::getMinMaxCountProjectionBlock(
     const StorageMetadataPtr & metadata_snapshot,
     const Names & required_columns,
-    bool has_filter,
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_dag,
     const DataPartsVector & parts,
     const PartitionIdToMaxBlock * max_block_numbers_to_read,
     ContextPtr query_context) const
@@ -6668,7 +6666,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
     Block virtual_columns_block;
     auto virtual_block = getSampleBlockWithVirtualColumns();
     bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); });
-    if (has_virtual_column || has_filter)
+    if (has_virtual_column || filter_dag)
     {
         virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */);
         if (virtual_columns_block.rows() == 0)
@@ -6680,7 +6678,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
     std::optional<PartitionPruner> partition_pruner;
     std::optional<KeyCondition> minmax_idx_condition;
     DataTypes minmax_columns_types;
-    if (has_filter)
+    if (filter_dag)
     {
         if (metadata_snapshot->hasPartitionKey())
         {
@@ -6689,16 +6687,15 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
             minmax_columns_types = getMinMaxColumnsTypes(partition_key);
 
             minmax_idx_condition.emplace(
-                query_info, query_context, minmax_columns_names,
+                filter_dag, query_context, minmax_columns_names,
                 getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(query_context)));
-            partition_pruner.emplace(metadata_snapshot, query_info, query_context, false /* strict */);
+            partition_pruner.emplace(metadata_snapshot, filter_dag, query_context, false /* strict */);
         }
 
+        const auto * predicate = filter_dag->getOutputs().at(0);
+
         // Generate valid expressions for filtering
-        ASTPtr expression_ast;
-        VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, query_context, virtual_columns_block, expression_ast);
-        if (expression_ast)
-            VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, query_context, expression_ast);
+        VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context);
 
         rows = virtual_columns_block.rows();
         part_name_column = virtual_columns_block.getByName("_part").column;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index dfa13eca11d..f0dbaf0e307 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -404,8 +404,7 @@ public:
     Block getMinMaxCountProjectionBlock(
         const StorageMetadataPtr & metadata_snapshot,
         const Names & required_columns,
-        bool has_filter,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_dag,
         const DataPartsVector & parts,
         const PartitionIdToMaxBlock * max_block_numbers_to_read,
         ContextPtr query_context) const;
@@ -1222,7 +1221,7 @@ protected:
         boost::iterator_range<DataPartIteratorByStateAndInfo> range, const ColumnsDescription & storage_columns);
 
     std::optional<UInt64> totalRowsByPartitionPredicateImpl(
-        const SelectQueryInfo & query_info, ContextPtr context, const DataPartsVector & parts) const;
+        const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const DataPartsVector & parts) const;
 
     static decltype(auto) getStateModifier(DataPartState state)
     {
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 7b30622a4fc..9c7e6933630 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -819,34 +819,6 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
 }
 
-
-std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(
-    const MergeTreeData & data,
-    const MergeTreeData::DataPartsVector & parts,
-    const ASTPtr & query,
-    ContextPtr context)
-{
-    std::unordered_set<String> part_values;
-    ASTPtr expression_ast;
-    auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
-
-    if (virtual_columns_block.rows() == 0)
-        return {};
-
-    // Generate valid expressions for filtering
-    VirtualColumnUtils::prepareFilterBlockWithQuery(query, context, virtual_columns_block, expression_ast);
-
-    // If there is still something left, fill the virtual block and do the filtering.
-    if (expression_ast)
-    {
-        virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */);
-        VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, context, expression_ast);
-        return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
-    }
-
-    return {};
-}
-
 void MergeTreeDataSelectExecutor::filterPartsByPartition(
     const std::optional<PartitionPruner> & partition_pruner,
     const std::optional<KeyCondition> & minmax_idx_condition,
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index 11c8e172a4f..4c6e1086cbc 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -169,12 +169,6 @@ public:
     /// If possible, filter using expression on virtual columns.
     /// Example: SELECT count() FROM table WHERE _part = 'part_name'
     /// If expression found, return a set with allowed part names (std::nullopt otherwise).
-    static std::optional<std::unordered_set<String>> filterPartsByVirtualColumns(
-        const MergeTreeData & data,
-        const MergeTreeData::DataPartsVector & parts,
-        const ASTPtr & query,
-        ContextPtr context);
-
     static std::optional<std::unordered_set<String>> filterPartsByVirtualColumns(
         const MergeTreeData & data,
         const MergeTreeData::DataPartsVector & parts,
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index 4411d46e124..e36459b019f 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -23,6 +23,7 @@ namespace ErrorCodes
     extern const int INCORRECT_NUMBER_OF_COLUMNS;
     extern const int INCORRECT_QUERY;
     extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 template <typename Distance>
@@ -331,6 +332,11 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec
     return std::make_shared<MergeTreeIndexConditionAnnoy>(index, query, distance_function, context);
 };
 
+MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG");
+}
+
 MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
 {
     static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2;
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
index dead12fe66f..d511ab84859 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
@@ -88,7 +88,7 @@ private:
 };
 
 
-class MergeTreeIndexAnnoy : public IMergeTreeIndex
+class MergeTreeIndexAnnoy final : public IMergeTreeIndex
 {
 public:
 
@@ -98,7 +98,9 @@ public:
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
-    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const;
+    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override;
+    bool isVectorSearch() const override { return true; }
 
 private:
     const UInt64 trees;
diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index fa05f9e61e1..dbd33609a00 100644
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -43,9 +43,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator(con
     return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, index.column_names);
 }
 
-MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, ContextPtr context) const
+MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, index.sample_block, hash_functions);
+    return std::make_shared<MergeTreeIndexConditionBloomFilter>(filter_actions_dag, context, index.sample_block, hash_functions);
 }
 
 static void assertIndexColumnsType(const Block & header)
diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
index 4d688ae3cfc..d6f4d6f2cf5 100644
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
@@ -20,7 +20,7 @@ public:
 
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
 
-    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, ContextPtr context) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
 
 private:
     size_t bits_per_row;
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 398a85e92ac..da49814b83a 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -97,39 +97,18 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
 }
 
 MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
-    const SelectQueryInfo & info_, ContextPtr context_, const Block & header_, size_t hash_functions_)
-    : WithContext(context_), header(header_), query_info(info_), hash_functions(hash_functions_)
+    const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_)
+    : WithContext(context_), header(header_), hash_functions(hash_functions_)
 {
-    if (context_->getSettingsRef().allow_experimental_analyzer)
-    {
-        if (!query_info.filter_actions_dag)
-        {
-            rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
-            return;
-        }
-
-        RPNBuilder<RPNElement> builder(
-            query_info.filter_actions_dag->getOutputs().at(0),
-            context_,
-            [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
-        rpn = std::move(builder).extractRPN();
-        return;
-    }
-
-    ASTPtr filter_node = buildFilterNode(query_info.query);
-
-    if (!filter_node)
+    if (!filter_actions_dag)
     {
         rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
         return;
     }
 
-    auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context_);
     RPNBuilder<RPNElement> builder(
-        filter_node,
+        filter_actions_dag->getOutputs().at(0),
         context_,
-        std::move(block_with_constants),
-        query_info.prepared_sets,
         [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
     rpn = std::move(builder).extractRPN();
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index 952948fd582..db85c804d8d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -44,7 +44,7 @@ public:
         std::vector<std::pair<size_t, ColumnPtr>> predicate;
     };
 
-    MergeTreeIndexConditionBloomFilter(const SelectQueryInfo & info_, ContextPtr context_, const Block & header_, size_t hash_functions_);
+    MergeTreeIndexConditionBloomFilter(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_);
 
     bool alwaysUnknownOrTrue() const override;
 
@@ -58,7 +58,6 @@ public:
 
 private:
     const Block & header;
-    const SelectQueryInfo & query_info;
     const size_t hash_functions;
     std::vector<RPNElement> rpn;
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 6c1fff53109..e730f24bc34 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -137,7 +137,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos,
 }
 
 MergeTreeConditionFullText::MergeTreeConditionFullText(
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_actions_dag,
     ContextPtr context,
     const Block & index_sample_block,
     const BloomFilterParameters & params_,
@@ -146,38 +146,16 @@ MergeTreeConditionFullText::MergeTreeConditionFullText(
     , index_data_types(index_sample_block.getNamesAndTypesList().getTypes())
     , params(params_)
     , token_extractor(token_extactor_)
-    , prepared_sets(query_info.prepared_sets)
 {
-    if (context->getSettingsRef().allow_experimental_analyzer)
-    {
-        if (!query_info.filter_actions_dag)
-        {
-            rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
-            return;
-        }
-
-        RPNBuilder<RPNElement> builder(
-            query_info.filter_actions_dag->getOutputs().at(0),
-            context,
-            [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
-        rpn = std::move(builder).extractRPN();
-        return;
-    }
-
-    ASTPtr filter_node = buildFilterNode(query_info.query);
-
-    if (!filter_node)
+    if (!filter_actions_dag)
     {
         rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
         return;
     }
 
-    auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
     RPNBuilder<RPNElement> builder(
-        filter_node,
+        filter_actions_dag->getOutputs().at(0),
         context,
-        std::move(block_with_constants),
-        query_info.prepared_sets,
         [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
     rpn = std::move(builder).extractRPN();
 }
@@ -691,9 +669,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator(const
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition(
-        const SelectQueryInfo & query, ContextPtr context) const
+        const ActionsDAGPtr & filter_dag, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeConditionFullText>(query, context, index.sample_block, params, token_extractor.get());
+    return std::make_shared<MergeTreeConditionFullText>(filter_dag, context, index.sample_block, params, token_extractor.get());
 }
 
 MergeTreeIndexPtr bloomFilterIndexCreator(
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h
index 22f9215d563..58b88819027 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -62,7 +62,7 @@ class MergeTreeConditionFullText final : public IMergeTreeIndexCondition
 {
 public:
     MergeTreeConditionFullText(
-            const SelectQueryInfo & query_info,
+            const ActionsDAGPtr & filter_actions_dag,
             ContextPtr context,
             const Block & index_sample_block,
             const BloomFilterParameters & params_,
@@ -143,9 +143,6 @@ private:
     BloomFilterParameters params;
     TokenExtractorPtr token_extractor;
     RPN rpn;
-
-    /// Sets from syntax analyzer.
-    PreparedSetsPtr prepared_sets;
 };
 
 class MergeTreeIndexFullText final : public IMergeTreeIndex
@@ -165,7 +162,7 @@ public:
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
 
     MergeTreeIndexConditionPtr createIndexCondition(
-            const SelectQueryInfo & query, ContextPtr context) const override;
+            const ActionsDAGPtr & filter_dag, ContextPtr context) const override;
 
     BloomFilterParameters params;
     /// Function for selecting next token.
diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp
index 818bae40067..0995e2724ec 100644
--- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp
@@ -79,7 +79,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator(cons
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
-    const SelectQueryInfo &, ContextPtr) const
+    const ActionsDAGPtr &, ContextPtr) const
 {
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported");
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h
index 1cd0e3daf27..2296e1b717d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h
+++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h
@@ -70,7 +70,7 @@ public:
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
 
     MergeTreeIndexConditionPtr createIndexCondition(
-        const SelectQueryInfo & query, ContextPtr context) const override;
+        const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
 
     MergeTreeIndexMergedConditionPtr createIndexMergedCondition(
         const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override;
diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp
index 5e2a034cb97..4c28fe8f00b 100644
--- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp
@@ -184,7 +184,7 @@ void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos,
 }
 
 MergeTreeConditionInverted::MergeTreeConditionInverted(
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_actions_dag,
     ContextPtr context_,
     const Block & index_sample_block,
     const GinFilterParameters & params_,
@@ -192,41 +192,20 @@ MergeTreeConditionInverted::MergeTreeConditionInverted(
     :  WithContext(context_), header(index_sample_block)
     , params(params_)
     , token_extractor(token_extactor_)
-    , prepared_sets(query_info.prepared_sets)
 {
-    if (context_->getSettingsRef().allow_experimental_analyzer)
-    {
-        if (!query_info.filter_actions_dag)
-        {
-            rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
-            return;
-        }
-
-        rpn = std::move(
-                RPNBuilder<RPNElement>(
-                        query_info.filter_actions_dag->getOutputs().at(0), context_,
-                        [&](const RPNBuilderTreeNode & node, RPNElement & out)
-                        {
-                            return this->traverseAtomAST(node, out);
-                        }).extractRPN());
-        return;
-    }
-
-    ASTPtr filter_node = buildFilterNode(query_info.query);
-    if (!filter_node)
+    if (!filter_actions_dag)
     {
         rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
         return;
     }
 
-    auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context_);
-    RPNBuilder<RPNElement> builder(
-        filter_node,
-        context_,
-        std::move(block_with_constants),
-        query_info.prepared_sets,
-        [&](const RPNBuilderTreeNode & node, RPNElement & out) { return traverseAtomAST(node, out); });
-    rpn = std::move(builder).extractRPN();
+    rpn = std::move(
+            RPNBuilder<RPNElement>(
+                    filter_actions_dag->getOutputs().at(0), context_,
+                    [&](const RPNBuilderTreeNode & node, RPNElement & out)
+                    {
+                        return this->traverseAtomAST(node, out);
+                    }).extractRPN());
 }
 
 /// Keep in-sync with MergeTreeConditionFullText::alwaysUnknownOrTrue
@@ -721,9 +700,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexInverted::createIndexAggregatorForPart
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexInverted::createIndexCondition(
-        const SelectQueryInfo & query, ContextPtr context) const
+        const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeConditionInverted>(query, context, index.sample_block, params, token_extractor.get());
+    return std::make_shared<MergeTreeConditionInverted>(filter_actions_dag, context, index.sample_block, params, token_extractor.get());
 };
 
 MergeTreeIndexPtr invertedIndexCreator(
diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h
index 413cf206f0e..807651d0c26 100644
--- a/src/Storages/MergeTree/MergeTreeIndexInverted.h
+++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h
@@ -64,7 +64,7 @@ class MergeTreeConditionInverted final : public IMergeTreeIndexCondition, WithCo
 {
 public:
     MergeTreeConditionInverted(
-            const SelectQueryInfo & query_info,
+            const ActionsDAGPtr & filter_actions_dag,
             ContextPtr context,
             const Block & index_sample_block,
             const GinFilterParameters & params_,
@@ -169,7 +169,7 @@ public:
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
     MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store, const MergeTreeWriterSettings & /*settings*/) const override;
-    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
 
     GinFilterParameters params;
     /// Function for selecting next token.
diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
index 535fef45872..b1f8e09be9f 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@@ -156,20 +156,17 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s
 namespace
 {
 
-KeyCondition buildCondition(const IndexDescription & index, const SelectQueryInfo & query_info, ContextPtr context)
+KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context)
 {
-    if (context->getSettingsRef().allow_experimental_analyzer)
-        return KeyCondition{query_info.filter_actions_dag, context, index.column_names, index.expression};
-
-    return KeyCondition{query_info, context, index.column_names, index.expression};
+    return KeyCondition{filter_actions_dag, context, index.column_names, index.expression};
 }
 
 }
 
 MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax(
-    const IndexDescription & index, const SelectQueryInfo & query_info, ContextPtr context)
+    const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context)
     : index_data_types(index.data_types)
-    , condition(buildCondition(index, query_info, context))
+    , condition(buildCondition(index, filter_actions_dag, context))
 {
 }
 
@@ -200,9 +197,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator(const Me
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
-    const SelectQueryInfo & query, ContextPtr context) const
+    const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionMinMax>(index, query, context);
+    return std::make_shared<MergeTreeIndexConditionMinMax>(index, filter_actions_dag, context);
 }
 
 MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const
diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h
index a1a216fdf72..1e2abe6983f 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h
@@ -52,7 +52,7 @@ class MergeTreeIndexConditionMinMax final : public IMergeTreeIndexCondition
 public:
     MergeTreeIndexConditionMinMax(
         const IndexDescription & index,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_actions_dag,
         ContextPtr context);
 
     bool alwaysUnknownOrTrue() const override;
@@ -79,7 +79,7 @@ public:
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
 
     MergeTreeIndexConditionPtr createIndexCondition(
-        const SelectQueryInfo & query, ContextPtr context) const override;
+        const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
 
     const char* getSerializedFileExtension() const override { return ".idx2"; }
     MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT
diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
index 612c5d868cb..831856f8085 100644
--- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
@@ -247,7 +247,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
     const String & index_name_,
     const Block & index_sample_block,
     size_t max_rows_,
-    const SelectQueryInfo & query_info,
+    const ActionsDAGPtr & filter_dag,
     ContextPtr context)
     : index_name(index_name_)
     , max_rows(max_rows_)
@@ -256,42 +256,20 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
         if (!key_columns.contains(name))
             key_columns.insert(name);
 
-    if (context->getSettingsRef().allow_experimental_analyzer)
-    {
-        if (!query_info.filter_actions_dag)
-            return;
+    if (!filter_dag)
+        return;
 
-        if (checkDAGUseless(*query_info.filter_actions_dag->getOutputs().at(0), context))
-            return;
+    if (checkDAGUseless(*filter_dag->getOutputs().at(0), context))
+        return;
 
-        const auto * filter_node = query_info.filter_actions_dag->getOutputs().at(0);
-        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG({filter_node}, {}, context);
-        const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0);
+    auto filter_actions_dag = filter_dag->clone();
+    const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0);
 
-        std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> node_to_result_node;
-        filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node);
+    std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> node_to_result_node;
+    filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node);
 
-        filter_actions_dag->removeUnusedActions();
-        actions = std::make_shared<ExpressionActions>(filter_actions_dag);
-    }
-    else
-    {
-        ASTPtr ast_filter_node = buildFilterNode(query_info.query);
-        if (!ast_filter_node)
-            return;
-
-        if (checkASTUseless(ast_filter_node))
-            return;
-
-        auto expression_ast = ast_filter_node->clone();
-
-        /// Replace logical functions with bit functions.
-        /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
-        traverseAST(expression_ast);
-
-        auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList());
-        actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
-    }
+    filter_actions_dag->removeUnusedActions();
+    actions = std::make_shared<ExpressionActions>(filter_actions_dag);
 }
 
 bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const
@@ -704,9 +682,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
-    const SelectQueryInfo & query, ContextPtr context) const
+    const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, query, context);
+    return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, filter_actions_dag, context);
 }
 
 MergeTreeIndexPtr setIndexCreator(const IndexDescription & index)
diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h
index a53476ca751..7e60e5d6dc2 100644
--- a/src/Storages/MergeTree/MergeTreeIndexSet.h
+++ b/src/Storages/MergeTree/MergeTreeIndexSet.h
@@ -87,7 +87,7 @@ public:
         const String & index_name_,
         const Block & index_sample_block,
         size_t max_rows_,
-        const SelectQueryInfo & query_info,
+        const ActionsDAGPtr & filter_actions_dag,
         ContextPtr context);
 
     bool alwaysUnknownOrTrue() const override;
@@ -149,7 +149,7 @@ public:
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
 
     MergeTreeIndexConditionPtr createIndexCondition(
-            const SelectQueryInfo & query, ContextPtr context) const override;
+            const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
 
     size_t max_rows = 0;
 };
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index dc8ed368011..c9df7210569 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -36,6 +36,7 @@ namespace ErrorCodes
     extern const int INCORRECT_NUMBER_OF_COLUMNS;
     extern const int INCORRECT_QUERY;
     extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
@@ -366,6 +367,11 @@ MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const Sel
     return std::make_shared<MergeTreeIndexConditionUSearch>(index, query, distance_function, context);
 };
 
+MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG");
+}
+
 MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index)
 {
     static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2;
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index a7675620a2e..5107cfee371 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -100,7 +100,9 @@ public:
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
-    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const;
+    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override;
+    bool isVectorSearch() const override { return true; }
 
 private:
     const String distance_function;
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index da1e914b90e..4749470bedd 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -170,7 +170,9 @@ struct IMergeTreeIndex
     }
 
     virtual MergeTreeIndexConditionPtr createIndexCondition(
-        const SelectQueryInfo & query_info, ContextPtr context) const = 0;
+        const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const = 0;
+
+    virtual bool isVectorSearch() const { return false; }
 
     virtual MergeTreeIndexMergedConditionPtr createIndexMergedCondition(
         const SelectQueryInfo & /*query_info*/, StorageMetadataPtr /*storage_metadata*/) const
diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp
index c559ba4371a..668576f9021 100644
--- a/src/Storages/MergeTree/PartitionPruner.cpp
+++ b/src/Storages/MergeTree/PartitionPruner.cpp
@@ -9,10 +9,7 @@ namespace
 
 KeyCondition buildKeyCondition(const KeyDescription & partition_key, const SelectQueryInfo & query_info, ContextPtr context, bool strict)
 {
-    if (context->getSettingsRef().allow_experimental_analyzer)
-        return {query_info.filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict};
-
-    return {query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict};
+    return {query_info.filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict};
 }
 
 }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 25bb6691ff6..7a73e443f9a 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1058,11 +1058,6 @@ StorageFileSource::~StorageFileSource()
     beforeDestroy();
 }
 
-void StorageFileSource::setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_)
-{
-    setKeyConditionImpl(query_info_, context_, block_for_format);
-}
-
 void StorageFileSource::setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_)
 {
     setKeyConditionImpl(nodes, context_, block_for_format);
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 1fd3f2e0edf..5cf0215b691 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -256,8 +256,6 @@ private:
         return storage->getName();
     }
 
-    void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override;
-
     void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override;
 
     bool tryGetCountFromCache(const struct stat & file_stat);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e7ca50f4a5c..b8804ad3c6d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -262,10 +262,10 @@ std::optional<UInt64> StorageMergeTree::totalRows(const Settings &) const
     return getTotalActiveSizeInRows();
 }
 
-std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const
+std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const
 {
     auto parts = getVisibleDataPartsVector(local_context);
-    return totalRowsByPartitionPredicateImpl(query_info, local_context, parts);
+    return totalRowsByPartitionPredicateImpl(filter_actions_dag, local_context, parts);
 }
 
 std::optional<UInt64> StorageMergeTree::totalBytes(const Settings &) const
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index b2829ecb17f..51bf6aa42e7 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -66,7 +66,7 @@ public:
         size_t num_streams) override;
 
     std::optional<UInt64> totalRows(const Settings &) const override;
-    std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const override;
+    std::optional<UInt64> totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr) const override;
     std::optional<UInt64> totalBytes(const Settings &) const override;
     std::optional<UInt64> totalBytesUncompressed(const Settings &) const override;
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index eefcab01236..5a7e88ba6ba 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5453,11 +5453,11 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRows(const Settings & set
     return res;
 }
 
-std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const
+std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const
 {
     DataPartsVector parts;
     foreachActiveParts([&](auto & part) { parts.push_back(part); }, local_context->getSettingsRef().select_sequential_consistency);
-    return totalRowsByPartitionPredicateImpl(query_info, local_context, parts);
+    return totalRowsByPartitionPredicateImpl(filter_actions_dag, local_context, parts);
 }
 
 std::optional<UInt64> StorageReplicatedMergeTree::totalBytes(const Settings & settings) const
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 556d23d6903..2bd1fcbc693 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -163,7 +163,7 @@ public:
         size_t num_streams) override;
 
     std::optional<UInt64> totalRows(const Settings & settings) const override;
-    std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context) const override;
+    std::optional<UInt64> totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override;
     std::optional<UInt64> totalBytes(const Settings & settings) const override;
     std::optional<UInt64> totalBytesUncompressed(const Settings & settings) const override;
 
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 07d965d8bb3..0ad7f950421 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -152,11 +152,6 @@ public:
 
     String getName() const override;
 
-    void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override
-    {
-        setKeyConditionImpl(query_info_, context_, sample_block);
-    }
-
     void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override
     {
         setKeyConditionImpl(nodes, context_, sample_block);
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 8d027025882..f2b84a88906 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -170,11 +170,6 @@ public:
 
     String getName() const override { return name; }
 
-    void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override
-    {
-        setKeyConditionImpl(query_info_, context_, block_for_format);
-    }
-
     void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override
     {
         setKeyConditionImpl(nodes, context_, block_for_format);

From f652c6050cc59a75ab4f1e6e701647761084efc9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 17:35:37 +0000
Subject: [PATCH 134/204] Implement idnaEn/Decode()

---
 .../functions/string-functions.md             |  74 +++++++-
 src/Functions/punycode.cpp                    | 177 +++++++++++++++++-
 .../queries/0_stateless/02932_idna.reference  |  75 ++++++++
 tests/queries/0_stateless/02932_idna.sql      | 123 ++++++++++++
 .../0_stateless/02932_punycode.reference      |   8 +-
 tests/queries/0_stateless/02932_punycode.sql  |  80 ++++----
 6 files changed, 489 insertions(+), 48 deletions(-)
 create mode 100644 tests/queries/0_stateless/02932_idna.reference
 create mode 100644 tests/queries/0_stateless/02932_idna.sql

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index eb5cb29c502..668592a6fd9 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1385,7 +1385,7 @@ Result:
 
 ## punycodeEncode
 
-Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
+Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string.
 The string must be UTF8-encoded, otherwise an exception is thrown.
 
 **Syntax**
@@ -1457,6 +1457,78 @@ Result:
 
 Like `punycodeDecode` but returns `NULL` in case of an error instead of throwing an exception.
 
+## idnaEncode
+
+Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
+The string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
+Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
+
+**Syntax**
+
+```sql
+idnaEncode(val)
+```
+
+**Arguments**
+
+- `val` - Input value. [String](../data-types/string.md)
+
+**Returned value**
+
+- A ASCII representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select idnaEncode('straße.münchen.de');
+```
+
+Result:
+
+```result
+┌─idnaEncode('straße.münchen.de')─────┐
+│ xn--strae-oqa.xn--mnchen-3ya.de     │
+└─────────────────────────────────────┘
+```
+
+## idnaEncodeOrNull
+
+Like `idnaEncode` but returns `NULL` in case of an error instead of throwing an exception.
+
+## idnaDecode
+
+Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
+In case of an error (e.g. because the input is invalid), the input string is returned.
+Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.
+
+**Syntax**
+
+```sql
+idnaDecode(val)
+```
+
+**Arguments**
+
+- `val` - Input value. [String](../data-types/string.md)
+
+**Returned value**
+
+- A Unicode (UTF-8) representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de');
+```
+
+Result:
+
+```result
+┌─idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de')─┐
+│ straße.münchen.de                             │
+└───────────────────────────────────────────────┘
+```
+
 ## byteHammingDistance
 
 Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index 869b79daa10..d015b6212c8 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -15,6 +15,8 @@
 #    pragma clang diagnostic ignored "-Wnewline-eof"
 #endif
 #    include <ada/idna/punycode.h>
+#    include <ada/idna/to_ascii.h>
+#    include <ada/idna/to_unicode.h>
 #    include <ada/idna/unicode_transcoding.h>
 #ifdef __clang__
 #    pragma clang diagnostic pop
@@ -38,14 +40,23 @@ enum class ErrorHandling
     Null
 };
 
+/// Implementation of
+/// - punycode(En|De)code[OrNull](), see [1]
+/// - idna(En|De)code[OrNull](), see [2, 3]
+///
+/// [1] https://en.wikipedia.org/wiki/Punycode
+/// [2] https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
+/// [3] https://www.unicode.org/reports/tr46/#ToUnicode
 
+
+/// Kind of similar to FunctionStringToString but accepts String arguments only, and supports `OrNull` overloads
 template <typename Impl>
-class FunctionPunycode : public IFunction
+class FunctionIdnaPunycodeBase : public IFunction
 {
 public:
     static constexpr auto name = Impl::name;
 
-    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionPunycode<Impl>>(); }
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionIdnaPunycodeBase<Impl>>(); }
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 1; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@@ -73,7 +84,9 @@ public:
         if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
             Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
         else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of argument of function {}",
                 arguments[0].column->getName(), getName());
 
         if constexpr (Impl::error_handling == ErrorHandling::Null)
@@ -198,11 +211,111 @@ struct PunycodeDecodeImpl
     }
 };
 
+
+/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
+/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
+/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
+/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
+template <ErrorHandling error_handling_>
+struct IdnaEncodeImpl
+{
+    static constexpr auto error_handling = error_handling_;
+    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "idnaEncodeOrNull" : "idnaEncode";
+
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets,
+        ColumnUInt8::MutablePtr & col_res_null)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+        if constexpr (error_handling == ErrorHandling::Null)
+            col_res_null = ColumnUInt8::create(rows, 0);
+
+        size_t prev_offset = 0;
+        std::string ascii;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+            std::string_view value_view(value, value_length);
+
+            if (!value_view.empty()) /// to_ascii() expects non-empty input
+            {
+                ascii = ada::idna::to_ascii(value_view);
+                const bool ok = !ascii.empty();
+                if (!ok)
+                {
+                    if constexpr (error_handling == ErrorHandling::Throw)
+                    {
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
+                    }
+                    else
+                    {
+                        ascii.clear();
+                        col_res_null->getData()[row] = 1;
+                    }
+                }
+            }
+
+            res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            ascii.clear();
+        }
+    }
+};
+
+
+/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
+/// Therefore, this struct implements no error handling.
+struct IdnaDecodeImpl
+{
+    static constexpr auto error_handling = ErrorHandling::Throw; /// dummy
+    static constexpr auto name = "idnaDecode";
+
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets,
+        ColumnUInt8::MutablePtr & /*col_res_null*/)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+
+        size_t prev_offset = 0;
+        std::string unicode;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+            std::string_view value_view(value, value_length);
+
+            unicode = ada::idna::to_unicode(value_view);
+
+            res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            unicode.clear();
+        }
+    }
+};
+
+
 }
 
 REGISTER_FUNCTION(Punycode)
 {
-    factory.registerFunction<FunctionPunycode<PunycodeEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeEncode(str)",
@@ -219,7 +332,7 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionPunycode<PunycodeEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Returns NULL in case of error)",
         .syntax="punycodeEncode(str)",
@@ -236,7 +349,7 @@ Computes a Punycode representation of a string. Returns NULL in case of error)",
             }}
     });
 
-    factory.registerFunction<FunctionPunycode<PunycodeDecodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeDecodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Throws an exception in case of error.)",
         .syntax="punycodeDecode(str)",
@@ -253,7 +366,7 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionPunycode<PunycodeDecodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeDecodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
         .description=R"(
 Computes a Punycode representation of a string. Returns NULL in case of error)",
         .syntax="punycodeDecode(str)",
@@ -270,6 +383,56 @@ Computes a Punycode representation of a string. Returns NULL in case of error)",
             }}
     });
 
+    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
+        .description=R"(
+Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
+        .syntax="idnaEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaEncode('straße.münchen.de') AS ascii;",
+            R"(
+┌─ascii───────────────────────────┐
+│ xn--strae-oqa.xn--mnchen-3ya.de │
+└─────────────────────────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
+        .description=R"(
+Computes a ASCII representation of an Internationalized Domain Name. Returns NULL in case of error)",
+        .syntax="punycodeEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaEncodeOrNull('München') AS ascii;",
+            R"(
+┌─ascii───────────────────────────┐
+│ xn--strae-oqa.xn--mnchen-3ya.de │
+└─────────────────────────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaDecodeImpl>>(FunctionDocumentation{
+        .description=R"(
+Computes a Unicode representation of an Internationalized Domain Name.)",
+        .syntax="idnaDecode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
+            R"(
+┌─unicode───────────┐
+│ straße.münchen.de │
+└───────────────────┘
+            )"
+            }}
+    });
 }
 
 }
diff --git a/tests/queries/0_stateless/02932_idna.reference b/tests/queries/0_stateless/02932_idna.reference
new file mode 100644
index 00000000000..8bfba53ad11
--- /dev/null
+++ b/tests/queries/0_stateless/02932_idna.reference
@@ -0,0 +1,75 @@
+-- Negative tests
+-- Regular cases
+straße.de	xn--strae-oqa.de	straße.de	xn--strae-oqa.de	straße.de
+2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888
+AMAZON	amazon	amazon	amazon	amazon
+aa--	aa--	aa--	aa--	aa--
+a†--	xn--a---kp0a	a†--	xn--a---kp0a	a†--
+ab--c	ab--c	ab--c	ab--c	ab--c
+-†	xn----xhn	-†	xn----xhn	-†
+-x.xn--zca	-x.xn--zca	-x.ß	-x.xn--zca	-x.ß
+x-.xn--zca	x-.xn--zca	x-.ß	x-.xn--zca	x-.ß
+x-.ß	x-.xn--zca	x-.ß	x-.xn--zca	x-.ß
+x..ß	x..xn--zca	x..ß	x..xn--zca	x..ß
+128.0,0.1	128.0,0.1	128.0,0.1	128.0,0.1	128.0,0.1
+xn--zca.xn--zca	xn--zca.xn--zca	ß.ß	xn--zca.xn--zca	ß.ß
+xn--zca.ß	xn--zca.xn--zca	ß.ß	xn--zca.xn--zca	ß.ß
+x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x
+x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß
+x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß
+01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x
+≠	xn--1ch	≠	xn--1ch	≠
+aa--	aa--	aa--	aa--
+ab--c	ab--c	ab--c	ab--c
+-x	-x	-x	-x
+			
+xn--1ch	≠	xn--1ch	xn--1ch
+xn--dqd20apc	ᄎᆞᆷ	xn--dqd20apc	xn--dqd20apc
+xn--gdh	≮	xn--gdh	xn--gdh
+xn--80aaa0ahbbeh4c	йайзаакпий	xn--80aaa0ahbbeh4c	xn--80aaa0ahbbeh4c
+xn--3bs854c	团淄	xn--3bs854c	xn--3bs854c
+xn--mgb9awbf	عمان	xn--mgb9awbf	xn--mgb9awbf
+xn--mgbaam7a8h	امارات	xn--mgbaam7a8h	xn--mgbaam7a8h
+xn--mgbbh1a71e	بھارت	xn--mgbbh1a71e	xn--mgbbh1a71e
+xn--s7y.com	短.com	xn--s7y.com	xn--s7y.com
+xn--55qx5d.xn--tckwe	公司.コム	xn--55qx5d.xn--tckwe	xn--55qx5d.xn--tckwe
+xn--4dbrk0ce	ישראל	xn--4dbrk0ce	xn--4dbrk0ce
+xn--zckzah	テスト	xn--zckzah	xn--zckzah
+xn--p1ai.com	рф.com	xn--p1ai.com	xn--p1ai.com
+xn--mxahbxey0c.gr	εχαμπλε.gr	xn--mxahbxey0c.gr	xn--mxahbxey0c.gr
+xn--h2brj9c	भारत	xn--h2brj9c	xn--h2brj9c
+xn--d1acpjx3f.xn--p1ai	яндекс.рф	xn--d1acpjx3f.xn--p1ai	xn--d1acpjx3f.xn--p1ai
+xn--q9jyb4c	みんな	xn--q9jyb4c	xn--q9jyb4c
+xn--sterreich-z7a.at	österreich.at	xn--sterreich-z7a.at	xn--sterreich-z7a.at
+xn--h2breg3eve.xn--h2brj9c	भारतम्.भारत	xn--h2breg3eve.xn--h2brj9c	xn--h2breg3eve.xn--h2brj9c
+ejemplo.xn--q9jyb4c	ejemplo.みんな	ejemplo.xn--q9jyb4c	ejemplo.xn--q9jyb4c
+xn--9t4b11yi5a.com	테스트.com	xn--9t4b11yi5a.com	xn--9t4b11yi5a.com
+xn--gk3at1e.com	通販.com	xn--gk3at1e.com	xn--gk3at1e.com
+xn--42c2d9a	คอม	xn--42c2d9a	xn--42c2d9a
+1xn--	1xn--	1xn--	1xn--
+xn--bih.com	⌘.com	xn--bih.com	xn--bih.com
+xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c	موقع.وزارة-الأتصالات.مصر	xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c	xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c
+xn--mgbb9fbpob	موبايلي	xn--mgbb9fbpob	xn--mgbb9fbpob
+xn--55qw42g.xn--55qw42g	公益.公益	xn--55qw42g.xn--55qw42g	xn--55qw42g.xn--55qw42g
+≠	≠	xn--1ch	xn--1ch
+ファッション.biz	ファッション.biz	xn--bck1b9a5dre4c.biz	xn--bck1b9a5dre4c.biz
+-- Special cases
+
+
+
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+				
+münchen	xn--mnchen-3ya	münchen	xn--mnchen-3ya	münchen
+straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de	straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de	straße.münchen.de
+london.co.uk	london.co.uk
+microsoft.com	microsoft.com
+straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de
+xn--	\N
+xn--	\N
+xn--tešla	\N
diff --git a/tests/queries/0_stateless/02932_idna.sql b/tests/queries/0_stateless/02932_idna.sql
new file mode 100644
index 00000000000..3572d4a6aec
--- /dev/null
+++ b/tests/queries/0_stateless/02932_idna.sql
@@ -0,0 +1,123 @@
+-- Tags: no-fasttest
+-- no-fasttest: requires idna library
+
+-- See also 02932_punycode.sql
+
+SELECT '-- Negative tests';
+
+SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT idnaEncodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT idnaEncodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT idnaEncodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT idnaEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT idnaEncodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT idnaDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT '-- Regular cases';
+
+-- The test cases originate from the ada idna unit tests:
+-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt
+-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt
+
+SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '-†' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+SELECT '≠' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
+
+SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
+
+SELECT '-- Special cases';
+
+SELECT idnaEncode('');
+SELECT idnaEncodeOrNull('');
+SELECT idnaDecode('');
+
+SELECT idnaEncode(NULL);
+SELECT idnaEncodeOrNull(NULL);
+SELECT idnaDecode(NULL);
+
+-- garbage IDNA/unicode values, see
+-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt
+-- only idnaEncode() is tested, idnaDecode() has by definition no invalid input values
+SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS }
+SELECT idnaEncodeOrNull('xn--');
+SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS }
+SELECT idnaEncodeOrNull('ﻱa');
+SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS }
+SELECT idnaEncodeOrNull('xn--a-yoc');
+SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS }
+SELECT idnaEncodeOrNull('xn--tešla');
+
+-- long input
+-- SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, idnaEncodeOrNull(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull FORMAT Vertical;
+
+-- non-const values
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
+INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen');
+SELECT idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull FROM tab;
+DROP TABLE tab;
+
+-- non-const values with a few invalid values for testing the OrNull variants
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (ascii String) ENGINE=MergeTree ORDER BY ascii;
+INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('straße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--');
+SELECT ascii, idnaEncode(ascii) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
+SELECT ascii, idnaEncodeOrNull(ascii) AS original FROM tab;
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference
index a722894bce8..76508525b19 100644
--- a/tests/queries/0_stateless/02932_punycode.reference
+++ b/tests/queries/0_stateless/02932_punycode.reference
@@ -34,7 +34,13 @@ MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前	MajiK
 \N
 \N
 \N
-Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+Row 1:
+──────
+str:            Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+puny:           Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
+original:       Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+punyOrNull:     Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
+originalOrNull: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
 München	Mnchen-3ya	München	Mnchen-3ya	München
 abc	abc-	abc	abc-	abc
 aäoöuü	aou-qla5gqb	aäoöuü	aou-qla5gqb	aäoöuü
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
index a142848d427..8df47cbf3da 100644
--- a/tests/queries/0_stateless/02932_punycode.sql
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -1,82 +1,84 @@
 -- Tags: no-fasttest
 -- no-fasttest: requires idna library
 
+-- See also 02932_idna.sql
+
 SELECT '-- Negative tests';
 
-SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeDecodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
-SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeDecodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
-SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeDecodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeEncodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
-SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeDecodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeEncodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT '-- Regular cases';
 
 -- The test cases originate from the ada idna unit tests:
---- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
+-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
 
-SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
-SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
+SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
 
 SELECT '-- Special cases';
 
-SELECT punycodeDecode('');
-SELECT punycodeDecodeOrNull('');
 SELECT punycodeEncode('');
 SELECT punycodeEncodeOrNull('');
+SELECT punycodeDecode('');
+SELECT punycodeDecodeOrNull('');
 
-SELECT punycodeDecode(NULL);
-SELECT punycodeDecodeOrNull(NULL);
 SELECT punycodeEncode(NULL);
 SELECT punycodeEncodeOrNull(NULL);
+SELECT punycodeDecode(NULL);
+SELECT punycodeDecodeOrNull(NULL);
 
 -- garbage Punycode-encoded values
 SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
 SELECT punycodeDecodeOrNull('no punycode');
 
 -- long input
-SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull;
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull FORMAT Vertical;
 
 -- non-const values
 DROP TABLE IF EXISTS tab;
 CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
 INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
-SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) as originalOrNull FROM tab;
+SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull FROM tab;
 DROP TABLE tab;
 
 -- non-const values with a few invalid values for testing the OrNull variants

From 950140cbf5b4649dfcf413ded2be2a65faa6cf7e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 18:43:52 +0000
Subject: [PATCH 135/204] Fixing build.

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index de87f041bf0..b409e857d9a 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -37,8 +37,8 @@
 #include <Common/JSONBuilder.h>
 #include <Common/isLocalAddress.h>
 #include <Common/logger_useful.h>
-#include "Storages/MergeTree/MergeTreeIndexAnnoy.h"
-#include "Storages/MergeTree/MergeTreeIndexUSearch.h"
+#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
+#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Parsers/ExpressionListParsers.h>
 
@@ -1421,11 +1421,15 @@ static void buildIndexes(
                 MergeTreeIndexConditionPtr condition;
                 if (index_helper->isVectorSearch())
                 {
+#ifdef ENABLE_ANNOY
                     if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
                         condition = annoy->createIndexCondition(*info, context);
-                    else if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
+#endif
+#ifdef ENABLE_USEARCH
+                    if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
                         condition = usearch->createIndexCondition(*info, context);
-                    else
+#endif
+                    if (!condition)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
                 }
                 else

From a813431fcb58a50650e7e9eef29d41dd33a6397f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 3 Jan 2024 20:09:12 +0000
Subject: [PATCH 136/204] Fix spelling

---
 .../check-style/aspell-ignore/en/aspell-dict.txt | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index f339f451878..11fab98cf2d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -344,6 +344,7 @@ Hypot
 IANA
 IDE
 IDEs
+IDNA
 IMDS
 INFILE
 INSERTed
@@ -701,8 +702,6 @@ PrettySpaceMonoBlock
 PrettySpaceNoEscapes
 PrettySpaceNoEscapesMonoBlock
 Prewhere
-TotalPrimaryKeyBytesInMemory
-TotalPrimaryKeyBytesInMemoryAllocated
 PrivateKeyPassphraseHandler
 ProfileEvents
 Profiler
@@ -913,6 +912,7 @@ ThreadsInOvercommitTracker
 Timeunit
 TinyLog
 Tkachenko
+ToASCII
 ToCenterChild
 ToChildren
 ToGeo
@@ -921,10 +921,13 @@ ToIPv
 ToParent
 ToSnowflake
 ToString
+ToUnicode
 Toolset
 TopK
 TotalBytesOfMergeTreeTables
 TotalPartsOfMergeTreeTables
+TotalPrimaryKeyBytesInMemory
+TotalPrimaryKeyBytesInMemoryAllocated
 TotalRowsOfMergeTreeTables
 TotalTemporaryFiles
 Tradeoff
@@ -1652,6 +1655,9 @@ hyvor
 icosahedron
 icudata
 idempotency
+idnaDecode
+idnaEncode
+idnaEncodeOrNull
 ifNotFinite
 ifNull
 iframe
@@ -1849,14 +1855,14 @@ metrica
 metroHash
 mfedotov
 minMap
+minSampleSizeContinuous
+minSampleSizeConversion
 mindsdb
 minimalistic
 mininum
 miniselect
 minmap
 minmax
-minSampleSizeContinuous
-minSampleSizeConversion
 mins
 misconfiguration
 mispredictions
@@ -2077,7 +2083,9 @@ pseudorandomize
 psql
 ptrs
 punycodeDecode
+punycodeDecodeOrNull
 punycodeEncode
+punycodeEncodeOrNull
 pushdown
 pwrite
 py

From 66d2db52832a81aea43cda66a500d8b3369547ef Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 4 Jan 2024 00:27:04 +0100
Subject: [PATCH 137/204] New parallel replicas coordinator implementation
 (#57968)

---
 src/Common/ProfileEvents.cpp                  |  12 +
 src/Core/Settings.h                           |   1 +
 .../ClusterProxy/executeQuery.cpp             |   3 +-
 .../MergeTreeReadPoolParallelReplicas.cpp     |  11 +-
 .../MergeTreeReadPoolParallelReplicas.h       |   1 +
 .../ParallelReplicasReadingCoordinator.cpp    | 810 +++++++++++++-----
 .../ParallelReplicasReadingCoordinator.h      |   4 +-
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  32 +
 .../test.py                                   | 156 ++++
 .../configs/remote_servers.xml                |  22 -
 .../test.py                                   | 156 ----
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  22 -
 .../test.py                                   | 140 ---
 15 files changed, 817 insertions(+), 553 deletions(-)
 rename tests/integration/{test_parallel_replicas_distributed_read_from_all => test_parallel_replicas_all_marks_read}/__init__.py (100%)
 create mode 100644 tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
 create mode 100644 tests/integration/test_parallel_replicas_all_marks_read/test.py
 delete mode 100644 tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/__init__.py
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/test.py

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 4bdf6288a1c..119e0d99143 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -288,6 +288,18 @@ The server successfully detected this situation and will download merged part fr
     M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
     M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
     \
+    M(ParallelReplicasHandleRequestMicroseconds, "Time spent processing requests for marks from replicas") \
+    M(ParallelReplicasHandleAnnouncementMicroseconds, "Time spent processing replicas announcements") \
+    \
+    M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \
+    M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \
+    M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \
+    \
+    M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \
+    M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \
+    M(ParallelReplicasStealingLeftoversMicroseconds, "Time spent collecting orphaned segments") \
+    M(ParallelReplicasCollectingOwnedSegmentsMicroseconds, "Time spent collecting segments meant by hash") \
+    \
     M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
     M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \
     M(PerfCacheReferences, "Cache accesses. Usually, this indicates Last Level Cache accesses, but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9516ef72077..4e057861f60 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -185,6 +185,7 @@ class IColumn;
     M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
     M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
     M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
+    M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
     \
     M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \
     \
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 18f7280dd19..c448206ed78 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -412,7 +412,8 @@ void executeQueryWithParallelReplicas(
         new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
     }
 
-    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount());
+    auto coordinator
+        = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount(), settings.parallel_replicas_mark_segment_size);
     auto external_tables = new_context->getExternalTables();
     auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
         query_ast,
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
index e61ddf0d122..69e64d5ea98 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h>
 
+
 namespace DB
 {
 
@@ -30,12 +31,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
         settings_,
         context_)
     , extension(std::move(extension_))
+    , coordination_mode(CoordinationMode::Default)
 {
-    extension.all_callback(InitialAllRangesAnnouncement(
-        CoordinationMode::Default,
-        parts_ranges.getDescriptions(),
-        extension.number_of_current_replica
-    ));
+    extension.all_callback(
+        InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica));
 }
 
 MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_idx*/, MergeTreeReadTask * previous_task)
@@ -48,7 +47,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id
     if (buffered_ranges.empty())
     {
         auto result = extension.callback(ParallelReadRequest(
-            CoordinationMode::Default,
+            coordination_mode,
             extension.number_of_current_replica,
             pool_settings.min_marks_for_concurrent_read * pool_settings.threads,
             /// For Default coordination mode we don't need to pass part names.
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
index 08020565ec4..7579a892b67 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
@@ -31,6 +31,7 @@ private:
     mutable std::mutex mutex;
 
     const ParallelReadingExtension extension;
+    const CoordinationMode coordination_mode;
     RangesInDataPartsDescription buffered_ranges;
     bool no_more_tasks_available{false};
     Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas");
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 333a0590d6b..bbe8c30a5c0 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -1,27 +1,77 @@
 #include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
 
 #include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <iterator>
+#include <map>
 #include <mutex>
 #include <numeric>
-#include <vector>
-#include <map>
 #include <set>
-
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
 #include <consistent_hashing.h>
 
-#include "Common/Exception.h"
-#include <Common/logger_useful.h>
-#include <Common/SipHash.h>
-#include <Common/thread_local_rng.h>
-#include <base/types.h>
-#include "IO/WriteBufferFromString.h"
 #include <IO/Progress.h>
-#include "Storages/MergeTree/RangesInDataPart.h"
-#include "Storages/MergeTree/RequestResponse.h"
-#include <Storages/MergeTree/MarkRange.h>
+#include <IO/WriteBufferFromString.h>
 #include <Storages/MergeTree/IntersectionsIndexes.h>
+#include <Storages/MergeTree/MarkRange.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/RequestResponse.h>
+#include <base/defines.h>
+#include <base/types.h>
+#include <boost/algorithm/string/split.hpp>
 #include <fmt/core.h>
 #include <fmt/format.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Common/SipHash.h>
+#include <Common/logger_useful.h>
+#include <Common/thread_local_rng.h>
+
+using namespace DB;
+
+namespace
+{
+size_t roundDownToMultiple(size_t num, size_t multiple)
+{
+    return (num / multiple) * multiple;
+}
+
+size_t
+takeFromRange(const MarkRange & range, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartDescription & result)
+{
+    const auto marks_needed = min_number_of_marks - current_marks_amount;
+    chassert(marks_needed);
+    auto range_we_take = MarkRange{range.begin, range.begin + std::min(marks_needed, range.getNumberOfMarks())};
+    if (!result.ranges.empty() && result.ranges.back().end == range_we_take.begin)
+        /// Can extend the previous range
+        result.ranges.back().end = range_we_take.end;
+    else
+        result.ranges.emplace_back(range_we_take);
+    current_marks_amount += range_we_take.getNumberOfMarks();
+    return range_we_take.getNumberOfMarks();
+}
+}
+
+namespace ProfileEvents
+{
+extern const Event ParallelReplicasHandleRequestMicroseconds;
+extern const Event ParallelReplicasHandleAnnouncementMicroseconds;
+
+extern const Event ParallelReplicasStealingByHashMicroseconds;
+extern const Event ParallelReplicasProcessingPartsMicroseconds;
+extern const Event ParallelReplicasStealingLeftoversMicroseconds;
+extern const Event ParallelReplicasCollectingOwnedSegmentsMicroseconds;
+
+extern const Event ParallelReplicasReadAssignedMarks;
+extern const Event ParallelReplicasReadUnassignedMarks;
+extern const Event ParallelReplicasReadAssignedForStealingMarks;
+}
 
 namespace ProfileEvents
 {
@@ -58,7 +108,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+extern const int BAD_ARGUMENTS;
+extern const int LOGICAL_ERROR;
 }
 
 class ParallelReplicasReadingCoordinator::ImplInterface
@@ -68,6 +119,15 @@ public:
     {
         size_t number_of_requests{0};
         size_t sum_marks{0};
+
+        /// Marks assigned to the given replica by consistent hash
+        size_t assigned_to_me = 0;
+        /// Marks stolen from other replicas
+        size_t stolen_unassigned = 0;
+
+        /// Stolen marks that were assigned for stealing to the given replica by hash. Makes sense only for DefaultCoordinator
+        size_t stolen_by_hash = 0;
+
         bool is_unavailable{false};
     };
     using Stats = std::vector<Stat>;
@@ -76,7 +136,15 @@ public:
         String result = "Statistics: ";
         std::vector<String> stats_by_replica;
         for (size_t i = 0; i < stats.size(); ++i)
-            stats_by_replica.push_back(fmt::format("replica {}{} - {{requests: {} marks: {}}}", i, stats[i].is_unavailable ? " is unavailable" : "", stats[i].number_of_requests, stats[i].sum_marks));
+            stats_by_replica.push_back(fmt::format(
+                "replica {}{} - {{requests: {} marks: {} assigned_to_me: {} stolen_by_hash: {} stolen_unassigned: {}}}",
+                i,
+                stats[i].is_unavailable ? " is unavailable" : "",
+                stats[i].number_of_requests,
+                stats[i].sum_marks,
+                stats[i].assigned_to_me,
+                stats[i].stolen_by_hash,
+                stats[i].stolen_unassigned));
         result += fmt::format("{}", fmt::join(stats_by_replica, "; "));
         return result;
     }
@@ -92,6 +160,7 @@ public:
     {}
 
     virtual ~ImplInterface() = default;
+
     virtual ParallelReadResponse handleRequest(ParallelReadRequest request) = 0;
     virtual void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0;
     virtual void markReplicaAsUnavailable(size_t replica_number) = 0;
@@ -103,165 +172,227 @@ using Parts = std::set<Part>;
 using PartRefs = std::deque<Parts::iterator>;
 
 
+/// This coordinator relies heavily on the fact that we work with a single shard,
+/// i.e. the difference in parts contained in each replica's snapshot is rather negligible (it is only recently inserted or merged parts).
+/// So the guarantees we provide here are basically the same as with single-node reading: we will read from parts as their were seen by some node at the moment when query started.
+///
+/// Knowing that almost each part could be read by each node, we suppose ranges of each part to be available to all the replicas and thus distribute them evenly between them
+/// (of course we still check if replica has access to the given part before scheduling a reading from it).
+///
+/// Of course we want to distribute marks evenly. Looks like it is better to split parts into reasonably small segments of equal size
+/// (something between 16 and 128 granules i.e. ~100K and ~1M rows should work).
+/// This approach seems to work ok for all three main cases: full scan, reading random sub-ranges and reading only {pre,suf}-fix of parts.
+/// Also we could expect that more granular division will make distribution more even up to a certain point.
 class DefaultCoordinator : public ParallelReplicasReadingCoordinator::ImplInterface
 {
 public:
-    using ParallelReadRequestPtr = std::unique_ptr<ParallelReadRequest>;
-    using PartToMarkRanges = std::map<PartToRead::PartAndProjectionNames, HalfIntervals>;
-
-    explicit DefaultCoordinator(size_t replicas_count_)
+    explicit DefaultCoordinator(size_t replicas_count_, size_t mark_segment_size_)
         : ParallelReplicasReadingCoordinator::ImplInterface(replicas_count_)
-        , reading_state(replicas_count_)
+        , mark_segment_size(mark_segment_size_)
+        , replica_status(replicas_count_)
+        , distribution_by_hash_queue(replicas_count_)
     {
+        if (mark_segment_size == 0)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`");
     }
 
     ~DefaultCoordinator() override;
 
-    struct PartitionReading
-    {
-        PartSegments part_ranges;
-        PartToMarkRanges mark_ranges_in_part;
-    };
+    ParallelReadResponse handleRequest(ParallelReadRequest request) override;
 
-    using PartitionToBlockRanges = std::map<String, PartitionReading>;
-    PartitionToBlockRanges partitions;
+    void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
+
+    void markReplicaAsUnavailable(size_t replica_number) override;
+
+private:
+    /// This many granules will represent a single segment of marks that will be assigned to a replica
+    const size_t mark_segment_size{0};
 
     size_t sent_initial_requests{0};
+    bool state_initialized{false};
+    size_t finished_replicas{0};
 
-    Parts all_parts_to_read;
-    /// Contains only parts which we haven't started to read from
-    PartRefs delayed_parts;
-    /// Per-replica preferred parts split by consistent hash
-    /// Once all task will be done by some replica, it can steal tasks
-    std::vector<PartRefs> reading_state;
+    struct ReplicaStatus
+    {
+        bool is_finished{false};
+        bool is_announcement_received{false};
+    };
+    std::vector<ReplicaStatus> replica_status;
 
     Poco::Logger * log = &Poco::Logger::get("DefaultCoordinator");
 
-    std::atomic<bool> state_initialized{false};
+    /// Workflow of a segment:
+    /// 0. `all_parts_to_read` contains all the parts and thus all the segments initially present there (virtually)
+    /// 1. when we traverse `all_parts_to_read` in selectPartsAndRanges() we either:
+    ///     * take this segment into output
+    ///     * put this segment into `distribution_by_hash_queue` for its owner if it's available and can read from it
+    ///     * otherwise put this segment into `distribution_by_hash_queue` for its stealer_by_hash if it's available and can read from it
+    ///     * otherwise put this segment into `ranges_for_stealing_queue`
+    /// 2. when we traverse `distribution_by_hash_queue` in `selectPartsAndRanges` we either:
+    ///     * take this segment into output
+    ///     * otherwise put this segment into `distribution_by_hash_queue` for its stealer_by_hash if it's available and can read from it
+    ///     * otherwise put this segment into `ranges_for_stealing_queue`
+    /// 3. when we figuring out that some replica is unavailable we move all segments from its `distribution_by_hash_queue` to their stealers by hash or to `ranges_for_stealing_queue`
+    /// 4. when we get the announcement from a replica we move all segments it cannot read to their stealers by hash or to `ranges_for_stealing_queue`
+    ///
+    /// So, segments always move in one direction down this path (possibly skipping some stops):
+    /// `all_parts_to_read` -> `distribution_by_hash_queue[owner]` -> `distribution_by_hash_queue[stealer_by_hash]` -> `ranges_for_stealing_queue`
 
-    ParallelReadResponse handleRequest(ParallelReadRequest request) override;
-    void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
-    void markReplicaAsUnavailable(size_t replica_number) override;
+    /// We take the set of parts announced by this replica as the working set for the whole query.
+    /// For this replica we know for sure that
+    ///     1. it sees all the parts from this set
+    ///     2. it was available in the beginning of execution (since we got announcement), so if it will become unavailable at some point - query will be failed with exception.
+    ///        this means that we can delegate reading of all leftover segments (i.e. segments that were not read by their owner or stealer by hash) to this node
+    size_t source_replica_for_parts_snapshot{0};
 
-    void updateReadingState(InitialAllRangesAnnouncement announcement);
-    void finalizeReadingState();
+    /// Parts view from the first announcement we received
+    std::vector<Part> all_parts_to_read;
 
-    size_t computeConsistentHash(const MergeTreePartInfo & info) const
+    std::unordered_map<std::string, std::unordered_set<size_t>> part_visibility; /// part_name -> set of replicas announced that part
+
+    /// We order parts from biggest (= oldest) to newest and steal from newest. Because we assume
+    /// that they're gonna be merged soon anyway and for them we should already expect worse cache hit.
+    struct BiggerPartsFirst
     {
-        auto hash = SipHash();
-        hash.update(info.getPartNameV1());
-        return ConsistentHashing(hash.get64(), replicas_count);
-    }
+        bool operator()(const auto & lhs, const auto & rhs) const { return lhs.info.getBlocksCount() > rhs.info.getBlocksCount(); }
+    };
 
-    void selectPartsAndRanges(const PartRefs & container, size_t replica_num, size_t min_number_of_marks, size_t & current_mark_size, ParallelReadResponse & response) const;
+    /// We don't precalculate the whole assignment for each node at the start.
+    /// When replica asks coordinator for a new portion of data to read, it traverses `all_parts_to_read` to find ranges relevant to this replica (by consistent hash).
+    /// Many hashes are being calculated during this process and just to not loose this time we save the information about all these ranges
+    /// observed along the way to what node they belong to.
+    /// Ranges in this queue might belong to a part that the given replica cannot read from - the corresponding check happens later.
+    /// TODO: consider making it bounded in size
+    std::vector<std::multiset<RangesInDataPartDescription, BiggerPartsFirst>> distribution_by_hash_queue;
+
+    /// For some ranges their owner and stealer (by consistent hash) cannot read from the given part at all. So this range have to be stolen anyway.
+    /// TODO: consider making it bounded in size
+    RangesInDataPartsDescription ranges_for_stealing_queue;
+
+    /// We take only first replica's set of parts as the whole working set for this query.
+    /// For other replicas we'll just discard parts that they know, but that weren't present in the first request we received.
+    /// The second and all subsequent announcements needed only to understand if we can schedule reading from the given part to the given replica.
+    void initializeReadingState(InitialAllRangesAnnouncement announcement);
+
+    void setProgressCallback();
+
+    enum class ScanMode
+    {
+        /// Main working set for the replica
+        TakeWhatsMineByHash,
+        /// We need to steal to optimize tail latency, let's do it by hash nevertheless
+        TakeWhatsMineForStealing,
+        /// All bets are off, we need to steal "for correctness" - to not leave any segments unread
+        TakeEverythingAvailable
+    };
+
+    void selectPartsAndRanges(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    size_t computeConsistentHash(const std::string & part_name, size_t segment_begin, ScanMode scan_mode) const;
+
+    void tryToTakeFromDistributionQueue(
+        size_t replica_num, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartsDescription & description);
+
+    void tryToStealFromQueues(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    void tryToStealFromQueue(
+        auto & queue,
+        ssize_t owner, /// In case `queue` is `distribution_by_hash_queue[replica]`
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    void processPartsFurther(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    bool possiblyCanReadPart(size_t replica, const MergeTreePartInfo & info) const;
+    void enqueueSegment(const MergeTreePartInfo & info, const MarkRange & segment, size_t owner);
+    void enqueueToStealerOrStealingQueue(const MergeTreePartInfo & info, const MarkRange & segment);
 };
 
+
 DefaultCoordinator::~DefaultCoordinator()
 {
-    LOG_DEBUG(log, "Coordination done: {}", toString(stats));
+    try
+    {
+        LOG_DEBUG(log, "Coordination done: {}", toString(stats));
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+    }
 }
 
-void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announcement)
+void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement announcement)
 {
-    PartRefs parts_diff;
-
-    /// To get rid of duplicates
-    for (auto && part_ranges: announcement.description)
+    for (const auto & part : announcement.description)
     {
-        Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}};
-        const MergeTreePartInfo & announced_part = part.description.info;
-
-        auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part);
-        if (it != all_parts_to_read.cend())
-        {
-            const MergeTreePartInfo & found_part = it->description.info;
-            if (found_part == announced_part)
-            {
-                /// We have the same part - add the info about presence on current replica
-                it->replicas.insert(announcement.replica_num);
-                continue;
-            }
-            else
-            {
-                /// check if it is covering or covered part
-                /// need to compare with 2 nearest parts in set, - lesser and greater than the part from the announcement
-                bool is_disjoint = found_part.isDisjoint(announced_part);
-                if (it != all_parts_to_read.cbegin() && is_disjoint)
-                {
-                    const MergeTreePartInfo & lesser_part = (--it)->description.info;
-                    is_disjoint &= lesser_part.isDisjoint(announced_part);
-                }
-                if (!is_disjoint)
-                    continue;
-            }
-        }
-        else if (!all_parts_to_read.empty())
-        {
-            /// the announced part is greatest - check if it's disjoint with lesser part
-            const MergeTreePartInfo & lesser_part = all_parts_to_read.crbegin()->description.info;
-            if (!lesser_part.isDisjoint(announced_part))
-                continue;
-        }
-
-        auto [insert_it, _] = all_parts_to_read.emplace(std::move(part));
-        parts_diff.push_back(insert_it);
+        /// We don't really care here if this part will be included into the working set or not
+        part_visibility[part.info.getPartNameV1()].insert(announcement.replica_num);
     }
 
-    /// Split all parts by consistent hash
-    while (!parts_diff.empty())
+    /// If state is already initialized - just register availabitily info and leave
+    if (state_initialized)
+        return;
+
+    for (auto && part : announcement.description)
     {
-        auto current_part_it = parts_diff.front();
-        parts_diff.pop_front();
-        auto consistent_hash = computeConsistentHash(current_part_it->description.info);
+        auto intersecting_it = std::find_if(
+            all_parts_to_read.begin(),
+            all_parts_to_read.end(),
+            [&part](const Part & other) { return !other.description.info.isDisjoint(part.info); });
 
-        /// Check whether the new part can easy go to replica queue
-        if (current_part_it->replicas.contains(consistent_hash))
-        {
-            reading_state[consistent_hash].emplace_back(current_part_it);
-            continue;
-        }
+        if (intersecting_it != all_parts_to_read.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersecting parts found in announcement");
 
-        /// Add to delayed parts
-        delayed_parts.emplace_back(current_part_it);
+        all_parts_to_read.push_back(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
     }
+
+    std::ranges::sort(
+        all_parts_to_read, [](const Part & lhs, const Part & rhs) { return BiggerPartsFirst()(lhs.description, rhs.description); });
+    state_initialized = true;
+    source_replica_for_parts_snapshot = announcement.replica_num;
+
+    LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; "));
 }
 
 void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number)
 {
-    if (stats[replica_number].is_unavailable == false)
+    LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
+
+    ++unavailable_replicas_count;
+    stats[replica_number].is_unavailable = true;
+
+    if (sent_initial_requests == replicas_count - unavailable_replicas_count)
+        setProgressCallback();
+
+    for (const auto & segment : distribution_by_hash_queue[replica_number])
     {
-        LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
-
-        stats[replica_number].is_unavailable = true;
-        ++unavailable_replicas_count;
-
-        if (sent_initial_requests == replicas_count - unavailable_replicas_count)
-            finalizeReadingState();
+        chassert(segment.ranges.size() == 1);
+        enqueueToStealerOrStealingQueue(segment.info, segment.ranges.front());
     }
+    distribution_by_hash_queue[replica_number].clear();
 }
 
-void DefaultCoordinator::finalizeReadingState()
+void DefaultCoordinator::setProgressCallback()
 {
-    /// Clear all the delayed queue
-    while (!delayed_parts.empty())
-    {
-        auto current_part_it = delayed_parts.front();
-        auto consistent_hash = computeConsistentHash(current_part_it->description.info);
-
-        if (current_part_it->replicas.contains(consistent_hash))
-        {
-            reading_state[consistent_hash].emplace_back(current_part_it);
-            delayed_parts.pop_front();
-            continue;
-        }
-
-        /// In this situation just assign to a random replica which has this part
-        auto replica = *(std::next(current_part_it->replicas.begin(), thread_local_rng() % current_part_it->replicas.size()));
-        reading_state[replica].emplace_back(current_part_it);
-        delayed_parts.pop_front();
-    }
-
-    // update progress with total rows
+    // Update progress with total rows
     if (progress_callback)
     {
         size_t total_rows_to_read = 0;
@@ -274,116 +405,378 @@ void DefaultCoordinator::finalizeReadingState()
 
         LOG_DEBUG(log, "Total rows to read: {}", total_rows_to_read);
     }
-
-    LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; "));
 }
 
-
 void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
     const auto replica_num = announcement.replica_num;
 
-    updateReadingState(std::move(announcement));
+    LOG_DEBUG(log, "Initial request from replica {}: {}", announcement.replica_num, announcement.describe());
+
+    initializeReadingState(std::move(announcement));
 
     if (replica_num >= stats.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size());
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size());
 
     ++stats[replica_num].number_of_requests;
+    replica_status[replica_num].is_announcement_received = true;
 
     ++sent_initial_requests;
     LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count);
+
     if (sent_initial_requests == replicas_count)
-        finalizeReadingState();
-}
+        setProgressCallback();
 
-void DefaultCoordinator::selectPartsAndRanges(const PartRefs & container, size_t replica_num, size_t min_number_of_marks, size_t & current_mark_size, ParallelReadResponse & response) const
-{
-    for (const auto & part : container)
+    /// Sift the queue to move out all invisible segments
+    for (const auto & segment : distribution_by_hash_queue[replica_num])
     {
-        if (current_mark_size >= min_number_of_marks)
+        if (!part_visibility[segment.info.getPartNameV1()].contains(replica_num))
         {
-            LOG_TEST(log, "Current mark size {} is bigger than min_number_marks {}", current_mark_size, min_number_of_marks);
-            break;
-        }
-
-        if (part->description.ranges.empty())
-        {
-            LOG_TEST(log, "Part {} is already empty in reading state", part->description.info.getPartNameV1());
-            continue;
-        }
-
-        if (std::find(part->replicas.begin(), part->replicas.end(), replica_num) == part->replicas.end())
-        {
-            LOG_TEST(log, "Not found part {} on replica {}", part->description.info.getPartNameV1(), replica_num);
-            continue;
-        }
-
-        response.description.push_back({
-            .info = part->description.info,
-            .ranges = {},
-        });
-
-        while (!part->description.ranges.empty() && current_mark_size < min_number_of_marks)
-        {
-            auto & range = part->description.ranges.front();
-            const size_t needed = min_number_of_marks - current_mark_size;
-
-            if (range.getNumberOfMarks() > needed)
-            {
-                auto range_we_take = MarkRange{range.begin, range.begin + needed};
-                response.description.back().ranges.emplace_back(range_we_take);
-                current_mark_size += range_we_take.getNumberOfMarks();
-
-                range.begin += needed;
-                break;
-            }
-
-            response.description.back().ranges.emplace_back(range);
-            current_mark_size += range.getNumberOfMarks();
-            part->description.ranges.pop_front();
+            chassert(segment.ranges.size() == 1);
+            enqueueToStealerOrStealingQueue(segment.info, segment.ranges.front());
         }
     }
 }
 
+void DefaultCoordinator::tryToTakeFromDistributionQueue(
+    size_t replica_num, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartsDescription & description)
+{
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasCollectingOwnedSegmentsMicroseconds);
+
+    auto & distribution_queue = distribution_by_hash_queue[replica_num];
+    auto replica_can_read_part = [&](auto replica, const auto & part) { return part_visibility[part.getPartNameV1()].contains(replica); };
+
+    RangesInDataPartDescription result;
+
+    while (!distribution_queue.empty() && current_marks_amount < min_number_of_marks)
+    {
+        if (result.ranges.empty() || distribution_queue.begin()->info != result.info)
+        {
+            if (!result.ranges.empty())
+                /// We're switching to a different part, so have to save currently accumulated ranges
+                description.push_back(result);
+            result = {.info = distribution_queue.begin()->info};
+        }
+
+        /// NOTE: this works because ranges are not considered by the comparator
+        auto & part_ranges = const_cast<RangesInDataPartDescription &>(*distribution_queue.begin());
+        chassert(part_ranges.ranges.size() == 1);
+        auto & range = part_ranges.ranges.front();
+
+        if (replica_can_read_part(replica_num, part_ranges.info))
+        {
+            if (auto taken = takeFromRange(range, min_number_of_marks, current_marks_amount, result); taken == range.getNumberOfMarks())
+                distribution_queue.erase(distribution_queue.begin());
+            else
+            {
+                range.begin += taken;
+                break;
+            }
+        }
+        else
+        {
+            /// It might be that `replica_num` is the stealer by hash itself - no problem,
+            /// we'll just have a redundant hash computation inside this function
+            enqueueToStealerOrStealingQueue(part_ranges.info, range);
+            distribution_queue.erase(distribution_queue.begin());
+        }
+    }
+
+    if (!result.ranges.empty())
+        description.push_back(result);
+}
+
+void DefaultCoordinator::tryToStealFromQueues(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    auto steal_from_other_replicas = [&]()
+    {
+        /// Try to steal from other replicas starting from replicas with longest queues
+        std::vector<size_t> order(replicas_count);
+        std::iota(order.begin(), order.end(), 0);
+        std::ranges::sort(
+            order, [&](auto lhs, auto rhs) { return distribution_by_hash_queue[lhs].size() > distribution_by_hash_queue[rhs].size(); });
+
+        for (auto replica : order)
+            tryToStealFromQueue(
+                distribution_by_hash_queue[replica],
+                replica,
+                replica_num,
+                scan_mode,
+                min_number_of_marks,
+                current_marks_amount,
+                description);
+    };
+
+    if (scan_mode == ScanMode::TakeWhatsMineForStealing)
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasStealingByHashMicroseconds);
+        steal_from_other_replicas();
+    }
+    else
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasStealingLeftoversMicroseconds);
+        /// Check orphaned ranges
+        tryToStealFromQueue(
+            ranges_for_stealing_queue, /*owner=*/-1, replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+        /// Last hope. In case we haven't yet figured out that some node is unavailable its segments are still in the distribution queue.
+        steal_from_other_replicas();
+    }
+}
+
+void DefaultCoordinator::tryToStealFromQueue(
+    auto & queue,
+    ssize_t owner,
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    auto replica_can_read_part = [&](auto replica, const auto & part) { return part_visibility[part.getPartNameV1()].contains(replica); };
+
+    RangesInDataPartDescription result;
+
+    auto it = queue.rbegin();
+    while (it != queue.rend() && current_marks_amount < min_number_of_marks)
+    {
+        auto & part_ranges = const_cast<RangesInDataPartDescription &>(*it);
+        chassert(part_ranges.ranges.size() == 1);
+        auto & range = part_ranges.ranges.front();
+
+        if (result.ranges.empty() || part_ranges.info != result.info)
+        {
+            if (!result.ranges.empty())
+                /// We're switching to a different part, so have to save currently accumulated ranges
+                description.push_back(result);
+            result = {.info = part_ranges.info};
+        }
+
+        if (replica_can_read_part(replica_num, part_ranges.info))
+        {
+            bool can_take = false;
+            if (scan_mode == ScanMode::TakeWhatsMineForStealing)
+            {
+                chassert(owner >= 0);
+                const size_t segment_begin = roundDownToMultiple(range.begin, mark_segment_size);
+                can_take = computeConsistentHash(part_ranges.info.getPartNameV1(), segment_begin, scan_mode) == replica_num;
+            }
+            else
+            {
+                /// Don't steal segments with alive owner that sees them
+                can_take = owner == -1 || stats[owner].is_unavailable || !replica_status[owner].is_announcement_received;
+            }
+            if (can_take)
+            {
+                if (auto taken = takeFromRange(range, min_number_of_marks, current_marks_amount, result); taken == range.getNumberOfMarks())
+                {
+                    it = decltype(it)(queue.erase(std::next(it).base()));
+                    continue;
+                }
+                else
+                    range.begin += taken;
+            }
+        }
+
+        ++it;
+    }
+
+    if (!result.ranges.empty())
+        description.push_back(result);
+}
+
+void DefaultCoordinator::processPartsFurther(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasProcessingPartsMicroseconds);
+
+    for (const auto & part : all_parts_to_read)
+    {
+        if (current_marks_amount >= min_number_of_marks)
+        {
+            LOG_TEST(log, "Current mark size {} is bigger than min_number_marks {}", current_marks_amount, min_number_of_marks);
+            return;
+        }
+
+        RangesInDataPartDescription result{.info = part.description.info};
+
+        while (!part.description.ranges.empty() && current_marks_amount < min_number_of_marks)
+        {
+            auto & range = part.description.ranges.front();
+
+            /// Parts are divided into segments of `mark_segment_size` granules staring from 0-th granule
+            for (size_t segment_begin = roundDownToMultiple(range.begin, mark_segment_size);
+                 segment_begin < range.end && current_marks_amount < min_number_of_marks;
+                 segment_begin += mark_segment_size)
+            {
+                const auto cur_segment
+                    = MarkRange{std::max(range.begin, segment_begin), std::min(range.end, segment_begin + mark_segment_size)};
+
+                const auto owner = computeConsistentHash(part.description.info.getPartNameV1(), segment_begin, scan_mode);
+                if (owner == replica_num)
+                {
+                    const auto taken = takeFromRange(cur_segment, min_number_of_marks, current_marks_amount, result);
+                    if (taken == range.getNumberOfMarks())
+                        part.description.ranges.pop_front();
+                    else
+                    {
+                        range.begin += taken;
+                        break;
+                    }
+                }
+                else
+                {
+                    chassert(scan_mode == ScanMode::TakeWhatsMineByHash);
+                    enqueueSegment(part.description.info, cur_segment, owner);
+                    range.begin += cur_segment.getNumberOfMarks();
+                    if (range.getNumberOfMarks() == 0)
+                        part.description.ranges.pop_front();
+                }
+            }
+        }
+
+        if (!result.ranges.empty())
+            description.push_back(std::move(result));
+    }
+}
+
+void DefaultCoordinator::selectPartsAndRanges(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    if (scan_mode == ScanMode::TakeWhatsMineByHash)
+    {
+        tryToTakeFromDistributionQueue(replica_num, min_number_of_marks, current_marks_amount, description);
+        processPartsFurther(replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+        /// We might back-fill `distribution_by_hash_queue` for this replica in `enqueueToStealerOrStealingQueue`
+        tryToTakeFromDistributionQueue(replica_num, min_number_of_marks, current_marks_amount, description);
+    }
+    else
+        tryToStealFromQueues(replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+}
+
+bool DefaultCoordinator::possiblyCanReadPart(size_t replica, const MergeTreePartInfo & info) const
+{
+    /// At this point we might not be sure if `owner` can read from the given part.
+    /// Then we will check it while processing `owner`'s data requests - they are guaranteed to came after the announcement.
+    return !stats[replica].is_unavailable && !replica_status[replica].is_finished
+        && (!replica_status[replica].is_announcement_received || part_visibility.at(info.getPartNameV1()).contains(replica));
+}
+
+void DefaultCoordinator::enqueueSegment(const MergeTreePartInfo & info, const MarkRange & segment, size_t owner)
+{
+    if (possiblyCanReadPart(owner, info))
+    {
+        /// TODO: optimize me (maybe we can store something lighter than RangesInDataPartDescription)
+        distribution_by_hash_queue[owner].insert(RangesInDataPartDescription{.info = info, .ranges = {segment}});
+        LOG_TEST(log, "Segment {} is added to its owner's ({}) queue", segment, owner);
+    }
+    else
+        enqueueToStealerOrStealingQueue(info, segment);
+}
+
+void DefaultCoordinator::enqueueToStealerOrStealingQueue(const MergeTreePartInfo & info, const MarkRange & segment)
+{
+    auto && range = RangesInDataPartDescription{.info = info, .ranges = {segment}};
+    const auto stealer_by_hash = computeConsistentHash(
+        info.getPartNameV1(), roundDownToMultiple(segment.begin, mark_segment_size), ScanMode::TakeWhatsMineForStealing);
+    if (possiblyCanReadPart(stealer_by_hash, info))
+    {
+        distribution_by_hash_queue[stealer_by_hash].insert(std::move(range));
+        LOG_TEST(log, "Segment {} is added to its stealer's ({}) queue", segment, stealer_by_hash);
+    }
+    else
+    {
+        ranges_for_stealing_queue.push_back(std::move(range));
+        LOG_TEST(log, "Segment {} is added to stealing queue", segment);
+    }
+}
+
+size_t DefaultCoordinator::computeConsistentHash(const std::string & part_name, size_t segment_begin, ScanMode scan_mode) const
+{
+    chassert(segment_begin % mark_segment_size == 0);
+    auto hash = SipHash();
+    hash.update(part_name);
+    hash.update(segment_begin);
+    hash.update(scan_mode);
+    return ConsistentHashing(hash.get64(), replicas_count);
+}
+
 ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest request)
 {
     LOG_TRACE(log, "Handling request from replica {}, minimal marks size is {}", request.replica_num, request.min_number_of_marks);
 
-    size_t current_mark_size = 0;
     ParallelReadResponse response;
 
-    /// 1. Try to select from preferred set of parts for current replica
-    selectPartsAndRanges(reading_state[request.replica_num], request.replica_num, request.min_number_of_marks, current_mark_size, response);
+    size_t current_mark_size = 0;
 
-    /// 2. Try to use parts from delayed queue
-    while (!delayed_parts.empty() && current_mark_size < request.min_number_of_marks)
-    {
-        auto part = delayed_parts.front();
-        delayed_parts.pop_front();
-        reading_state[request.replica_num].emplace_back(part);
-        selectPartsAndRanges(reading_state[request.replica_num], request.replica_num, request.min_number_of_marks, current_mark_size, response);
-    }
+    /// 1. Try to select ranges meant for this replica by consistent hash
+    selectPartsAndRanges(
+        request.replica_num, ScanMode::TakeWhatsMineByHash, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t assigned_to_me = current_mark_size;
 
-    /// 3. Try to steal tasks;
-    if (current_mark_size < request.min_number_of_marks)
-    {
-        for (size_t i = 0; i < replicas_count; ++i)
-        {
-            if (i != request.replica_num)
-                selectPartsAndRanges(reading_state[i], request.replica_num, request.min_number_of_marks, current_mark_size, response);
+    /// 2. Try to steal but with caching again (with different key)
+    selectPartsAndRanges(
+        request.replica_num, ScanMode::TakeWhatsMineForStealing, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t stolen_by_hash = current_mark_size - assigned_to_me;
 
-            if (current_mark_size >= request.min_number_of_marks)
-                break;
-        }
-    }
+    /// 3. Try to steal with no preference. We're trying to postpone it as much as possible.
+    if (current_mark_size == 0 && request.replica_num == source_replica_for_parts_snapshot)
+        selectPartsAndRanges(
+            request.replica_num, ScanMode::TakeEverythingAvailable, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t stolen_unassigned = current_mark_size - stolen_by_hash - assigned_to_me;
 
     stats[request.replica_num].number_of_requests += 1;
     stats[request.replica_num].sum_marks += current_mark_size;
 
+    stats[request.replica_num].assigned_to_me += assigned_to_me;
+    stats[request.replica_num].stolen_by_hash += stolen_by_hash;
+    stats[request.replica_num].stolen_unassigned += stolen_unassigned;
+
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadAssignedMarks, assigned_to_me);
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadUnassignedMarks, stolen_unassigned);
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadAssignedForStealingMarks, stolen_by_hash);
+
     if (response.description.empty())
+    {
         response.finish = true;
 
-    LOG_TRACE(log, "Going to respond to replica {} with {}", request.replica_num, response.describe());
+        replica_status[request.replica_num].is_finished = true;
+
+        if (++finished_replicas == replicas_count - unavailable_replicas_count)
+        {
+            /// Nobody will come to process any more data
+
+            if (!ranges_for_stealing_queue.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Some orphaned segments were left unread");
+
+            for (size_t replica = 0; replica < replicas_count; ++replica)
+                if (!distribution_by_hash_queue[replica].empty())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty distribution_by_hash_queue for replica {}", replica);
+        }
+    }
+
+    LOG_DEBUG(
+        log,
+        "Going to respond to replica {} with {}; mine_marks={}, stolen_by_hash={}, stolen_rest={}",
+        request.replica_num,
+        response.describe(),
+        assigned_to_me,
+        stolen_by_hash,
+        stolen_unassigned);
+
     return response;
 }
 
@@ -456,6 +849,8 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
         std::sort(ranges.begin(), ranges.end());
     }
 
+    ++stats[announcement.replica_num].number_of_requests;
+
     if (new_rows_to_read > 0)
     {
         Progress progress;
@@ -557,6 +952,8 @@ ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest
 
 void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasHandleAnnouncementMicroseconds);
+
     std::lock_guard lock(mutex);
 
     if (!pimpl)
@@ -570,6 +967,8 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
 
 ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request)
 {
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds);
+
     std::lock_guard lock(mutex);
 
     if (!pimpl)
@@ -604,7 +1003,7 @@ void ParallelReplicasReadingCoordinator::initialize()
     switch (mode)
     {
         case CoordinationMode::Default:
-            pimpl = std::make_unique<DefaultCoordinator>(replicas_count);
+            pimpl = std::make_unique<DefaultCoordinator>(replicas_count, mark_segment_size);
             break;
         case CoordinationMode::WithOrder:
             pimpl = std::make_unique<InOrderCoordinator<CoordinationMode::WithOrder>>(replicas_count);
@@ -621,7 +1020,10 @@ void ParallelReplicasReadingCoordinator::initialize()
         pimpl->markReplicaAsUnavailable(replica);
 }
 
-ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_) {}
+ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_)
+    : replicas_count(replicas_count_), mark_segment_size(mark_segment_size_)
+{
+}
 
 ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default;
 
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
index acc265c124f..9cba7d8e8c2 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
@@ -15,7 +15,7 @@ class ParallelReplicasReadingCoordinator
 public:
     class ImplInterface;
 
-    explicit ParallelReplicasReadingCoordinator(size_t replicas_count_);
+    explicit ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_ = 0);
     ~ParallelReplicasReadingCoordinator();
 
     void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement);
@@ -35,8 +35,8 @@ private:
 
     std::mutex mutex;
     size_t replicas_count{0};
+    size_t mark_segment_size{0};
     CoordinationMode mode{CoordinationMode::Default};
-    std::atomic<bool> initialized{false};
     std::unique_ptr<ImplInterface> pimpl;
     ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation
     std::set<size_t> replicas_used;
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/__init__.py b/tests/integration/test_parallel_replicas_all_marks_read/__init__.py
similarity index 100%
rename from tests/integration/test_parallel_replicas_distributed_read_from_all/__init__.py
rename to tests/integration/test_parallel_replicas_all_marks_read/__init__.py
diff --git a/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
new file mode 100644
index 00000000000..1ad562334f5
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
@@ -0,0 +1,32 @@
+<clickhouse>
+    <remote_servers>
+        <parallel_replicas_with_unavailable_nodes>
+            <shard>
+                <replica>
+                    <host>node0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node4</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node5</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </parallel_replicas_with_unavailable_nodes>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_parallel_replicas_all_marks_read/test.py b/tests/integration/test_parallel_replicas_all_marks_read/test.py
new file mode 100644
index 00000000000..7776ccb0c09
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_all_marks_read/test.py
@@ -0,0 +1,156 @@
+import json
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from random import randint
+
+cluster = ClickHouseCluster(__file__)
+cluster_name = "parallel_replicas_with_unavailable_nodes"
+
+nodes = [
+    cluster.add_instance(
+        f"node{num}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+    )
+    for num in range(3)
+]
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def _create_tables(table_name, table_size, index_granularity):
+    for num in range(len(nodes)):
+        nodes[num].query(f"DROP TABLE IF EXISTS {table_name}")
+
+        nodes[num].query(
+            f"""
+            CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String)
+            Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', '{num}')
+            ORDER BY (key)
+            SETTINGS index_granularity = {index_granularity}
+            """
+        )
+
+    nodes[0].query(
+        f"""
+        INSERT INTO {table_name}
+        SELECT number, toString(number) FROM numbers_mt({table_size})
+        """
+    )
+
+
+def _create_query(query_tmpl, table_name):
+    rand_set = [randint(0, 500) for i in range(42)]
+    return query_tmpl.format(table_name=table_name, rand_set=rand_set)
+
+
+def _get_result_without_parallel_replicas(query):
+    return nodes[0].query(
+        query,
+        settings={
+            "allow_experimental_parallel_reading_from_replicas": 0,
+        },
+    )
+
+
+def _get_result_with_parallel_replicas(
+    query, query_id, cluster_name, parallel_replicas_mark_segment_size
+):
+    return nodes[0].query(
+        query,
+        settings={
+            "allow_experimental_parallel_reading_from_replicas": 2,
+            "max_parallel_replicas": 6,
+            "cluster_for_parallel_replicas": f"{cluster_name}",
+            "parallel_replicas_mark_segment_size": parallel_replicas_mark_segment_size,
+            "query_id": query_id,
+        },
+    )
+
+
+def _get_expected_amount_of_marks_to_read(query):
+    return json.loads(
+        nodes[0].query(
+            f"""
+            EXPLAIN ESTIMATE
+            {query}
+            FORMAT JSONEachRow
+            """
+        )
+    )["marks"]
+
+
+def _get_number_of_marks_read_by_replicas(query_id):
+    nodes[0].query("SYSTEM FLUSH LOGS")
+    return (
+        nodes[0]
+        .query(
+            f"""
+                SELECT sum(
+                    ProfileEvents['ParallelReplicasReadAssignedMarks']
+                    + ProfileEvents['ParallelReplicasReadUnassignedMarks']
+                    + ProfileEvents['ParallelReplicasReadAssignedForStealingMarks']
+                )
+                FROM system.query_log
+                WHERE query_id = '{query_id}'
+                """
+        )
+        .strip()
+    )
+
+
+@pytest.mark.parametrize(
+    "query_tmpl",
+    [
+        "SELECT sum(cityHash64(*)) FROM {table_name}",
+        "SELECT sum(cityHash64(*)) FROM {table_name} WHERE intDiv(key, 100) IN {rand_set}",
+    ],
+)
+@pytest.mark.parametrize(
+    "table_size",
+    [1000, 10000, 100000],
+)
+@pytest.mark.parametrize(
+    "index_granularity",
+    [10, 100],
+)
+@pytest.mark.parametrize(
+    "parallel_replicas_mark_segment_size",
+    [1, 10],
+)
+def test_number_of_marks_read(
+    start_cluster,
+    query_tmpl,
+    table_size,
+    index_granularity,
+    parallel_replicas_mark_segment_size,
+):
+    if nodes[0].is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers (too slow)")
+
+    table_name = f"tbl_{len(query_tmpl)}_{cluster_name}_{table_size}_{index_granularity}_{parallel_replicas_mark_segment_size}"
+    _create_tables(table_name, table_size, index_granularity)
+
+    if "where" in query_tmpl.lower():
+        # We need all the replicas to see the same state of parts to make sure that index analysis will pick the same amount of marks for reading
+        # regardless of which replica's state will be chosen as the working set. This should became redundant once we start to always use initiator's snapshot.
+        nodes[0].query(f"OPTIMIZE TABLE {table_name} FINAL", settings={"alter_sync": 2})
+        for node in nodes:
+            node.query(f"SYSTEM SYNC REPLICA {table_name} STRICT")
+
+    query = _create_query(query_tmpl, table_name)
+    query_id = f"{table_name}_{randint(0, 1e9)}"
+
+    assert _get_result_with_parallel_replicas(
+        query, query_id, cluster_name, parallel_replicas_mark_segment_size
+    ) == _get_result_without_parallel_replicas(query)
+
+    assert _get_number_of_marks_read_by_replicas(
+        query_id
+    ) == _get_expected_amount_of_marks_to_read(query)
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
deleted file mode 100644
index 02a315479f8..00000000000
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_single_shard_multiple_replicas>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>n1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n3</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_single_shard_multiple_replicas>
-    </remote_servers>
-</clickhouse>
-
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
deleted file mode 100644
index 8af7bb12595..00000000000
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-nodes = [
-    cluster.add_instance(
-        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-    )
-    for i in (1, 2, 3)
-]
-
-
-@pytest.fixture(scope="module", autouse=True)
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def create_tables(cluster, table_name):
-    """create replicated tables in special way
-    - each table is populated by equal number of rows
-    - fetches are disabled, so each replica will have different set of rows
-      which enforce parallel replicas read from each replica
-    """
-
-    # create replicated tables
-    for node in nodes:
-        node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
-
-    nodes[0].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1')
-            ORDER BY (key)"""
-    )
-    nodes[1].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2')
-            ORDER BY (key)"""
-    )
-    nodes[2].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
-            ORDER BY (key)"""
-    )
-    # stop merges
-    nodes[0].query(f"system stop merges {table_name}")
-    nodes[1].query(f"system stop merges {table_name}")
-    nodes[2].query(f"system stop merges {table_name}")
-    # stop fetches
-    nodes[0].query(f"system stop fetches {table_name}")
-    nodes[1].query(f"system stop fetches {table_name}")
-    nodes[2].query(f"system stop fetches {table_name}")
-
-    # create distributed table
-    nodes[0].query(f"DROP TABLE IF EXISTS {table_name}_d SYNC")
-    nodes[0].query(
-        f"""
-            CREATE TABLE {table_name}_d AS {table_name}
-            Engine=Distributed(
-                {cluster},
-                currentDatabase(),
-                {table_name},
-                rand()
-            )
-            """
-    )
-
-    # populate data, equal number of rows for each replica
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-
-    return "60\t0\t59\t1770\n"
-
-
-@pytest.mark.parametrize(
-    "prefer_localhost_replica",
-    [
-        pytest.param(0),
-        pytest.param(1),
-    ],
-)
-def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica):
-    """create and populate table in special way (see create_table()),
-    so parallel replicas will read equal number of rows from each replica
-    """
-
-    cluster = "test_single_shard_multiple_replicas"
-    table_name = "test_table"
-    expected_result = create_tables(cluster, table_name)
-
-    # parallel replicas
-    assert (
-        nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 2,
-                "prefer_localhost_replica": prefer_localhost_replica,
-                "max_parallel_replicas": 3,
-            },
-        )
-        == expected_result
-    )
-
-    # check logs for coordinator statistic
-    for n in nodes:
-        n.query("SYSTEM FLUSH LOGS")
-
-    # each replica has 2 distinct parts (non-intersecting with another replicas),
-    # each part less then index granularity, therefore 2 marks for each replica to handle
-    coordinator_statistic = "replica 0 - {requests: 3 marks: 2}; replica 1 - {requests: 3 marks: 2}; replica 2 - {requests: 3 marks: 2}"
-    assert (
-        nodes[0].contains_in_log(coordinator_statistic)
-        or nodes[1].contains_in_log(coordinator_statistic)
-        or nodes[2].contains_in_log(coordinator_statistic)
-    )
-
-    # w/o parallel replicas
-    # start fetches back, otherwise the result will be not as expected
-    nodes[0].query(f"system start fetches {table_name}")
-    nodes[1].query(f"system start fetches {table_name}")
-    nodes[2].query(f"system start fetches {table_name}")
-    # ensure that replica in sync before querying it to get stable result
-    nodes[0].query(f"system start merges {table_name}")
-    nodes[0].query(f"system sync  replica {table_name}")
-    assert (
-        nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 0,
-            },
-        )
-        == expected_result
-    )
diff --git a/tests/integration/test_parallel_replicas_working_set/__init__.py b/tests/integration/test_parallel_replicas_working_set/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
deleted file mode 100644
index 02a315479f8..00000000000
--- a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_single_shard_multiple_replicas>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>n1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n3</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_single_shard_multiple_replicas>
-    </remote_servers>
-</clickhouse>
-
diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
deleted file mode 100644
index 0ede9d9b1a5..00000000000
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-nodes = [
-    cluster.add_instance(
-        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-    )
-    for i in (1, 2, 3)
-]
-
-
-@pytest.fixture(scope="module", autouse=True)
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def create_tables(cluster, table_name, node_with_covering_part):
-    # create replicated tables
-    for node in nodes:
-        node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
-
-    nodes[0].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1')
-            ORDER BY (key)"""
-    )
-    nodes[1].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2')
-            ORDER BY (key)"""
-    )
-    nodes[2].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
-            ORDER BY (key)"""
-    )
-    # stop merges to keep original parts
-    # stop fetches to keep only parts created on the nodes
-    for i in (0, 1, 2):
-        if i != node_with_covering_part:
-            nodes[i].query(f"system stop fetches {table_name}")
-            nodes[i].query(f"system stop merges {table_name}")
-
-    # populate data, equal number of rows for each replica
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)",
-    )
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)"
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)"
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)"
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)"
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)"
-    )
-    nodes[node_with_covering_part].query(f"system sync replica {table_name}")
-    nodes[node_with_covering_part].query(f"optimize table {table_name}")
-
-    # check we have expected set of parts
-    expected_active_parts = ""
-    if node_with_covering_part == 0:
-        expected_active_parts = (
-            "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
-        )
-
-    if node_with_covering_part == 1:
-        expected_active_parts = (
-            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n"
-        )
-
-    if node_with_covering_part == 2:
-        expected_active_parts = (
-            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n"
-        )
-
-    assert (
-        nodes[0].query(
-            f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name"
-        )
-        == expected_active_parts
-    )
-
-
-@pytest.mark.parametrize("node_with_covering_part", [0, 1, 2])
-def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
-    """create and populate table in special way (see create_table()),
-    node_with_covering_part contains all parts merged into one,
-    other nodes contain only parts which are result of insert via the node
-    """
-
-    cluster = "test_single_shard_multiple_replicas"
-    table_name = "test_table"
-    create_tables(cluster, table_name, node_with_covering_part)
-
-    # query result can be one of the following outcomes
-    # (1) query result if parallel replicas working set contains all_0_5_1
-    expected_full_result = "60\t0\t59\t1770\n"
-    expected_results = {expected_full_result}
-
-    # (2) query result if parallel replicas working set DOESN'T contain all_0_5_1
-    if node_with_covering_part == 0:
-        expected_results.add("40\t20\t59\t1580\n")
-    if node_with_covering_part == 1:
-        expected_results.add("40\t0\t59\t1180\n")
-    if node_with_covering_part == 2:
-        expected_results.add("40\t0\t39\t780\n")
-
-    # parallel replicas
-    result = nodes[0].query(
-        f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
-        settings={
-            "allow_experimental_parallel_reading_from_replicas": 2,
-            "prefer_localhost_replica": 0,
-            "max_parallel_replicas": 3,
-            "use_hedged_requests": 0,
-            "cluster_for_parallel_replicas": cluster,
-        },
-    )
-    assert result in expected_results
-
-    # w/o parallel replicas
-    assert (
-        nodes[node_with_covering_part].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 0,
-            },
-        )
-        == expected_full_result
-    )

From 97c222496ffee1d8e685448e5b16185b253213a2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 4 Jan 2024 00:44:45 +0100
Subject: [PATCH 138/204] Update references

---
 .../0_stateless/00547_named_tuples.reference  |   2 +-
 .../01458_named_tuple_millin.reference        |   4 +-
 .../01532_tuple_with_name_type.reference      |   8 +-
 ...2026_describe_include_subcolumns.reference |  56 ++-
 .../02179_map_cast_to_array.reference         |   2 +-
 .../02286_tuple_numeric_identifier.reference  |   2 +-
 ...26_settings_changes_system_table.reference |   2 +-
 .../02342_analyzer_compound_types.reference   |  24 +-
 .../02378_analyzer_projection_names.reference |  50 +--
 ..._tuple_to_array_schema_inference.reference |   2 +-
 .../02475_bson_each_row_format.reference      |   4 +-
 ...74_infer_objects_as_named_tuples.reference |  20 +-
 ...plete_types_as_strings_inference.reference |   4 +-
 .../02889_print_pretty_type_names.reference   |  34 +-
 .../02890_describe_table_options.reference    | 392 ++++++++++--------
 .../02906_flatten_only_true_nested.reference  |   2 +-
 ...rray_of_unnamed_tuples_inference.reference |   2 +-
 17 files changed, 319 insertions(+), 291 deletions(-)

diff --git a/tests/queries/0_stateless/00547_named_tuples.reference b/tests/queries/0_stateless/00547_named_tuples.reference
index 70cd0054bdd..041ead4ca79 100644
--- a/tests/queries/0_stateless/00547_named_tuples.reference
+++ b/tests/queries/0_stateless/00547_named_tuples.reference
@@ -1 +1 @@
-(1,'Hello')	Tuple(x UInt64, s String)	1	Hello	1	Hello
+(1,'Hello')	Tuple(\n    x UInt64,\n    s String)	1	Hello	1	Hello
diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference
index d6d6d7ae8d4..954dfe36563 100644
--- a/tests/queries/0_stateless/01458_named_tuple_millin.reference
+++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference
@@ -3,10 +3,10 @@ CREATE TABLE default.tuple
     `j` Tuple(a Int8, b String)
 )
 ENGINE = Memory
-j	Tuple(a Int8, b String)					
+j	Tuple(\n    a Int8,\n    b String)					
 CREATE TABLE default.tuple
 (
     `j` Tuple(a Int8, b String)
 )
 ENGINE = Memory
-j	Tuple(a Int8, b String)					
+j	Tuple(\n    a Int8,\n    b String)					
diff --git a/tests/queries/0_stateless/01532_tuple_with_name_type.reference b/tests/queries/0_stateless/01532_tuple_with_name_type.reference
index 8a3e57d9016..66b85f05fa6 100644
--- a/tests/queries/0_stateless/01532_tuple_with_name_type.reference
+++ b/tests/queries/0_stateless/01532_tuple_with_name_type.reference
@@ -1,4 +1,4 @@
-a	Tuple(key String, value String)					
-a	Tuple(Tuple(key String, value String))					
-a	Array(Tuple(key String, value String))					
-a	Tuple(UInt8, Tuple(key String, value String))					
+a	Tuple(\n    key String,\n    value String)					
+a	Tuple(Tuple(\n    key String,\n    value String))					
+a	Array(Tuple(\n    key String,\n    value String))					
+a	Tuple(UInt8, Tuple(\n    key String,\n    value String))					
diff --git a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference
index ba792ea9f74..ac114a03837 100644
--- a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference
+++ b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference
@@ -1,23 +1,33 @@
-┌─name─┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐
-│ d    │ Date                                                │              │                    │                         │                  │                      │
-│ n    │ Nullable(String)                                    │              │                    │ It is a nullable column │                  │                      │
-│ arr1 │ Array(UInt32)                                       │              │                    │                         │ ZSTD(1)          │                      │
-│ arr2 │ Array(Array(String))                                │              │                    │                         │                  │ d + toIntervalDay(1) │
-│ t    │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │              │                    │                         │ ZSTD(1)          │                      │
-└──────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘
-┌─name───────┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐
-│ d          │ Date                                                │              │                    │                         │                  │                      │            0 │
-│ n          │ Nullable(String)                                    │              │                    │ It is a nullable column │                  │                      │            0 │
-│ arr1       │ Array(UInt32)                                       │              │                    │                         │ ZSTD(1)          │                      │            0 │
-│ arr2       │ Array(Array(String))                                │              │                    │                         │                  │ d + toIntervalDay(1) │            0 │
-│ t          │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │              │                    │                         │ ZSTD(1)          │                      │            0 │
-│ n.null     │ UInt8                                               │              │                    │ It is a nullable column │                  │                      │            1 │
-│ arr1.size0 │ UInt64                                              │              │                    │                         │                  │                      │            1 │
-│ arr2.size0 │ UInt64                                              │              │                    │                         │                  │ d + toIntervalDay(1) │            1 │
-│ arr2.size1 │ Array(UInt64)                                       │              │                    │                         │                  │ d + toIntervalDay(1) │            1 │
-│ t.s        │ String                                              │              │                    │                         │ ZSTD(1)          │                      │            1 │
-│ t.a        │ Array(Tuple(a UInt32, b UInt32))                    │              │                    │                         │                  │                      │            1 │
-│ t.a.size0  │ UInt64                                              │              │                    │                         │                  │                      │            1 │
-│ t.a.a      │ Array(UInt32)                                       │              │                    │                         │ ZSTD(1)          │                      │            1 │
-│ t.a.b      │ Array(UInt32)                                       │              │                    │                         │ ZSTD(1)          │                      │            1 │
-└────────────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘
+┌─name─┬─type──────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐
+│ d    │ Date                                                                      │              │                    │                         │                  │                      │
+│ n    │ Nullable(String)                                                          │              │                    │ It is a nullable column │                  │                      │
+│ arr1 │ Array(UInt32)                                                             │              │                    │                         │ ZSTD(1)          │                      │
+│ arr2 │ Array(Array(String))                                                      │              │                    │                         │                  │ d + toIntervalDay(1) │
+│ t    │ Tuple(
+    s String,
+    a Array(Tuple(
+        a UInt32,
+        b UInt32))) │              │                    │                         │ ZSTD(1)          │                      │
+└──────┴───────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘
+┌─name───────┬─type──────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐
+│ d          │ Date                                                                      │              │                    │                         │                  │                      │            0 │
+│ n          │ Nullable(String)                                                          │              │                    │ It is a nullable column │                  │                      │            0 │
+│ arr1       │ Array(UInt32)                                                             │              │                    │                         │ ZSTD(1)          │                      │            0 │
+│ arr2       │ Array(Array(String))                                                      │              │                    │                         │                  │ d + toIntervalDay(1) │            0 │
+│ t          │ Tuple(
+    s String,
+    a Array(Tuple(
+        a UInt32,
+        b UInt32))) │              │                    │                         │ ZSTD(1)          │                      │            0 │
+│ n.null     │ UInt8                                                                     │              │                    │ It is a nullable column │                  │                      │            1 │
+│ arr1.size0 │ UInt64                                                                    │              │                    │                         │                  │                      │            1 │
+│ arr2.size0 │ UInt64                                                                    │              │                    │                         │                  │ d + toIntervalDay(1) │            1 │
+│ arr2.size1 │ Array(UInt64)                                                             │              │                    │                         │                  │ d + toIntervalDay(1) │            1 │
+│ t.s        │ String                                                                    │              │                    │                         │ ZSTD(1)          │                      │            1 │
+│ t.a        │ Array(Tuple(
+    a UInt32,
+    b UInt32))                                   │              │                    │                         │                  │                      │            1 │
+│ t.a.size0  │ UInt64                                                                    │              │                    │                         │                  │                      │            1 │
+│ t.a.a      │ Array(UInt32)                                                             │              │                    │                         │ ZSTD(1)          │                      │            1 │
+│ t.a.b      │ Array(UInt32)                                                             │              │                    │                         │ ZSTD(1)          │                      │            1 │
+└────────────┴───────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘
diff --git a/tests/queries/0_stateless/02179_map_cast_to_array.reference b/tests/queries/0_stateless/02179_map_cast_to_array.reference
index 81bb9fba537..e87d1c69c1b 100644
--- a/tests/queries/0_stateless/02179_map_cast_to_array.reference
+++ b/tests/queries/0_stateless/02179_map_cast_to_array.reference
@@ -6,4 +6,4 @@
 {1:{1:'1234'}}	[(1,{1:1234})]	[(1,{1:1234})]
 {1:{1:'1234'}}	[(1,[(1,'1234')])]	[(1,[(1,'1234')])]
 {1:{1:'1234'}}	[(1,[(1,1234)])]	[(1,[(1,1234)])]
-[(1,'val1'),(2,'val2')]	Array(Tuple(k UInt32, v String))
+[(1,'val1'),(2,'val2')]	Array(Tuple(\n    k UInt32,\n    v String))
diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
index 5f330409b2a..21348493d1d 100644
--- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
+++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
@@ -4,7 +4,7 @@ CREATE TABLE default.t_tuple_numeric\n(\n    `t` Tuple(`1` Tuple(`2` Int32, `3`
 2	3	4
 2	3	4
 2	3	4
-Tuple(`1` Tuple(`2` Int8, `3` Int8), `4` Int8)
+Tuple(\n    `1` Tuple(\n        `2` Int8,\n        `3` Int8),\n    `4` Int8)
 {"t":{"1":{"2":2,"3":3},"4":4}}
 2	3	4
 (('value'))
diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.reference b/tests/queries/0_stateless/02326_settings_changes_system_table.reference
index c4a3c71edfd..1c8c4fa1880 100644
--- a/tests/queries/0_stateless/02326_settings_changes_system_table.reference
+++ b/tests/queries/0_stateless/02326_settings_changes_system_table.reference
@@ -1,3 +1,3 @@
 version	String					
-changes	Array(Tuple(name String, previous_value String, new_value String, reason String))					
+changes	Array(Tuple(\n    name String,\n    previous_value String,\n    new_value String,\n    reason String))					
 22.5	[('memory_overcommit_ratio_denominator','0','1073741824','Enable memory overcommit feature by default'),('memory_overcommit_ratio_denominator_for_user','0','1073741824','Enable memory overcommit feature by default')]
diff --git a/tests/queries/0_stateless/02342_analyzer_compound_types.reference b/tests/queries/0_stateless/02342_analyzer_compound_types.reference
index 51e0bbe6e92..c384b548473 100644
--- a/tests/queries/0_stateless/02342_analyzer_compound_types.reference
+++ b/tests/queries/0_stateless/02342_analyzer_compound_types.reference
@@ -8,33 +8,33 @@ Constant tuple
 Tuple
 --
 id	UInt64					
-value	Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)					
+value	Tuple(\n    value_0_level_0 Tuple(\n        value_0_level_1 String,\n        value_1_level_1 String),\n    value_1_level_0 String)					
 0	(('value_0_level_1','value_1_level_1'),'value_1_level_0')
 --
 id	UInt64					
-value	Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)					
+value	Tuple(\n    value_0_level_0 Tuple(\n        value_0_level_1 String,\n        value_1_level_1 String),\n    value_1_level_0 String)					
 0	(('value_0_level_1','value_1_level_1'),'value_1_level_0')
 --
-value.value_0_level_0	Tuple(value_0_level_1 String, value_1_level_1 String)					
+value.value_0_level_0	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 value.value_1_level_0	String					
 ('value_0_level_1','value_1_level_1')	value_1_level_0
 --
-alias_value	Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)					
-alias_value.value_0_level_0	Tuple(value_0_level_1 String, value_1_level_1 String)					
+alias_value	Tuple(\n    value_0_level_0 Tuple(\n        value_0_level_1 String,\n        value_1_level_1 String),\n    value_1_level_0 String)					
+alias_value.value_0_level_0	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 alias_value.value_1_level_0	String					
 (('value_0_level_1','value_1_level_1'),'value_1_level_0')	('value_0_level_1','value_1_level_1')	value_1_level_0
 --
-alias_value	Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)					
-alias_value.value_0_level_0	Tuple(value_0_level_1 String, value_1_level_1 String)					
+alias_value	Tuple(\n    value_0_level_0 Tuple(\n        value_0_level_1 String,\n        value_1_level_1 String),\n    value_1_level_0 String)					
+alias_value.value_0_level_0	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 alias_value.value_1_level_0	String					
 (('value_0_level_1','value_1_level_1'),'value_1_level_0')	('value_0_level_1','value_1_level_1')	value_1_level_0
 --
-alias_value	Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)					
+alias_value	Tuple(\n    value_0_level_0 Tuple(\n        value_0_level_1 String,\n        value_1_level_1 String),\n    value_1_level_0 String)					
 toString(alias_value.value_0_level_0)	String					
 toString(alias_value.value_1_level_0)	String					
 (('value_0_level_1','value_1_level_1'),'value_1_level_0')	(\'value_0_level_1\',\'value_1_level_1\')	value_1_level_0
 --
-value.value_0_level_0	Tuple(value_0_level_1 String, value_1_level_1 String)					
+value.value_0_level_0	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 value.value_1_level_0	String					
 ('value_0_level_1','value_1_level_1')	value_1_level_0
 --
@@ -46,17 +46,17 @@ value.value_0_level_0.value_0_level_1	String
 value.value_0_level_0.value_1_level_1	String					
 value_0_level_1	value_1_level_1
 --
-alias_value	Tuple(value_0_level_1 String, value_1_level_1 String)					
+alias_value	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 alias_value.value_0_level_1	String					
 alias_value.value_1_level_1	String					
 ('value_0_level_1','value_1_level_1')	value_0_level_1	value_1_level_1
 --
-alias_value	Tuple(value_0_level_1 String, value_1_level_1 String)					
+alias_value	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 alias_value.value_0_level_1	String					
 alias_value.value_1_level_1	String					
 ('value_0_level_1','value_1_level_1')	value_0_level_1	value_1_level_1
 --
-alias_value	Tuple(value_0_level_1 String, value_1_level_1 String)					
+alias_value	Tuple(\n    value_0_level_1 String,\n    value_1_level_1 String)					
 toString(alias_value.value_0_level_1)	String					
 toString(alias_value.value_1_level_1)	String					
 ('value_0_level_1','value_1_level_1')	value_0_level_1	value_1_level_1
diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference
index f8b18e6df15..fd5bc7d4ae8 100644
--- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference
+++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference
@@ -13,7 +13,7 @@ concat(\'Value_1\', \'Value_2\')	String
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)'));
-CAST((1, \'Value\'), \'Tuple (id UInt64, value String)\')	Tuple(id UInt64, value String)					
+CAST((1, \'Value\'), \'Tuple (id UInt64, value String)\')	Tuple(\n    id UInt64,\n    value String)					
 SELECT 'Columns';
 Columns
 DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table);
@@ -77,45 +77,45 @@ e	String
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.id, a.value);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 a.id	UInt64					
 a.value	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.*);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 a.id	UInt64					
 a.value	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT id);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 a.value	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 a.id	UInt64					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY toString);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 toString(a.id)	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY x -> toString(x));
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 toString(a.id)	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a));
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 tupleElement(a, \'id\')	UInt64					
 tupleElement(a, \'value\')	String					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a) AS b);
-a	Tuple(id UInt64, value String)					
+a	Tuple(\n    id UInt64,\n    value String)					
 b.id	UInt64					
 b.value	String					
 SELECT 'Columns with aliases';
@@ -199,63 +199,63 @@ arrayMap(lambda(tuple(x), toString(id)), [1, 2, 3])	Array(String)
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.*, [1,2,3]));
-compound_value	Tuple(id UInt64)					
+compound_value	Tuple(\n    id UInt64)					
 arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> x, [1,2,3]));
-compound_value	Tuple(id UInt64)					
+compound_value	Tuple(\n    id UInt64)					
 arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY toString, [1,2,3]));
-compound_value	Tuple(id UInt64)					
+compound_value	Tuple(\n    id UInt64)					
 arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3])	Array(String)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> toString(x), [1,2,3]));
-compound_value	Tuple(id UInt64)					
+compound_value	Tuple(\n    id UInt64)					
 arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3])	Array(String)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value, [1,2,3]));
-compound_value	Tuple(id UInt64, value String)					
+compound_value	Tuple(\n    id UInt64,\n    value String)					
 arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> x, [1,2,3]));
-compound_value	Tuple(id UInt64, value String)					
+compound_value	Tuple(\n    id UInt64,\n    value String)					
 arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY toString, [1,2,3]));
-compound_value	Tuple(id UInt64, value String)					
+compound_value	Tuple(\n    id UInt64,\n    value String)					
 arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3])	Array(String)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> toString(x), [1,2,3]));
-compound_value	Tuple(id UInt64, value String)					
+compound_value	Tuple(\n    id UInt64,\n    value String)					
 arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3])	Array(String)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a), [1,2,3]) FROM test_table);
-a	Tuple(id UInt64)					
+a	Tuple(\n    id UInt64)					
 arrayMap(lambda(tuple(x), tupleElement(a, \'id\')), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a) AS untupled_value, [1,2,3]) FROM test_table);
-a	Tuple(id UInt64)					
+a	Tuple(\n    id UInt64)					
 arrayMap(untupled_value, [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value, [1,2,3]) FROM test_table);
-a	Tuple(id UInt64)					
+a	Tuple(\n    id UInt64)					
 untupled_value.id	UInt64					
 arrayMap(lambda(tuple(x), untupled_value.id), [1, 2, 3])	Array(UInt64)					
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value AS untupled_value_in_lambda, [1,2,3]) FROM test_table);
-a	Tuple(id UInt64)					
+a	Tuple(\n    id UInt64)					
 untupled_value.id	UInt64					
 arrayMap(untupled_value_in_lambda, [1, 2, 3])	Array(UInt64)					
 SELECT 'Standalone lambda';
@@ -285,13 +285,13 @@ arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3])	Array(Nullable(UInt8))
 SELECT '--';
 --
 DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.a, c.b);
-c	Tuple(a UInt8, b UInt8)					
+c	Tuple(\n    a UInt8,\n    b UInt8)					
 c.a	UInt8					
 c.b	UInt8					
 SELECT '--';
 --
 DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.*);
-c	Tuple(a UInt8, b UInt8)					
+c	Tuple(\n    a UInt8,\n    b UInt8)					
 c.a	UInt8					
 c.b	UInt8					
 SELECT '--';
@@ -311,13 +311,13 @@ arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3])	Array(Nullable(UInt8))
 SELECT '--';
 --
 DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.a, c.b);
-c	Tuple(a UInt8, b UInt8)					
+c	Tuple(\n    a UInt8,\n    b UInt8)					
 c.a	UInt8					
 c.b	UInt8					
 SELECT '--';
 --
 DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.*);
-c	Tuple(a UInt8, b UInt8)					
+c	Tuple(\n    a UInt8,\n    b UInt8)					
 c.a	UInt8					
 c.b	UInt8					
 SELECT '--';
diff --git a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference
index 57cafb6c8e0..3f4eeac37b3 100644
--- a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference
+++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference
@@ -1,3 +1,3 @@
 x	Array(Array(Nullable(Int64)))					
 x	Tuple(Array(Array(Nullable(Int64))), Nullable(Int64))					
-x	Tuple(key Array(Nullable(Int64)))					
+x	Tuple(\n    key Array(Nullable(Int64)))					
diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference
index f90867d92b1..5659e5201b1 100644
--- a/tests/queries/0_stateless/02475_bson_each_row_format.reference
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference
@@ -166,7 +166,7 @@ Tuple
 ('Hello',4)
 OK
 OK
-tuple	Tuple(x Nullable(Int64), s Nullable(String))					
+tuple	Tuple(\n    x Nullable(Int64),\n    s Nullable(String))					
 (0,'Hello')
 (1,'Hello')
 (2,'Hello')
@@ -214,7 +214,7 @@ Nested types
 [[0,1,2],[0,1,2,3]]	((3,'Hello'),'Hello')	{'a':{'a.a':3,'a.b':4},'b':{'b.a':3,'b.b':4}}
 [[0,1,2,3],[0,1,2,3,4]]	((4,'Hello'),'Hello')	{'a':{'a.a':4,'a.b':5},'b':{'b.a':4,'b.b':5}}
 nested1	Array(Array(Nullable(Int64)))					
-nested2	Tuple(Tuple(x Nullable(Int64), s Nullable(String)), Nullable(String))					
+nested2	Tuple(Tuple(\n    x Nullable(Int64),\n    s Nullable(String)), Nullable(String))					
 nested3	Map(String, Map(String, Nullable(Int64)))					
 [[],[0]]	((0,'Hello'),'Hello')	{'a':{'a.a':0,'a.b':1},'b':{'b.a':0,'b.b':1}}
 [[0],[0,1]]	((1,'Hello'),'Hello')	{'a':{'a.a':1,'a.b':2},'b':{'b.a':1,'b.b':2}}
diff --git a/tests/queries/0_stateless/02874_infer_objects_as_named_tuples.reference b/tests/queries/0_stateless/02874_infer_objects_as_named_tuples.reference
index 01ef288d81a..06c152a0a3c 100644
--- a/tests/queries/0_stateless/02874_infer_objects_as_named_tuples.reference
+++ b/tests/queries/0_stateless/02874_infer_objects_as_named_tuples.reference
@@ -1,34 +1,34 @@
-obj	Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)))					
+obj	Tuple(\n    a Nullable(Int64),\n    b Nullable(String),\n    c Array(Nullable(Int64)))					
 (42,'Hello',[1,2,3])
-obj	Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(Date))					
+obj	Tuple(\n    a Nullable(Int64),\n    b Nullable(String),\n    c Array(Nullable(Int64)),\n    d Nullable(Date))					
 (42,'Hello',[1,2,3],NULL)
 (43,'World',[],'2020-01-01')
-obj	Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(Date))					
+obj	Tuple(\n    a Nullable(Int64),\n    b Nullable(String),\n    c Array(Nullable(Int64)),\n    d Nullable(Date))					
 (42,'Hello',[1,2,3],NULL)
 (43,'World',[],'2020-01-01')
 (NULL,NULL,[],NULL)
-obj	Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(String))					
+obj	Tuple(\n    a Nullable(Int64),\n    b Nullable(String),\n    c Array(Nullable(Int64)),\n    d Nullable(String))					
 (42,'Hello',[1,2,3],NULL)
 (43,'World',[],'2020-01-01')
 (NULL,NULL,[],NULL)
 (NULL,'2020-01-01',[],'Hello')
-obj	Array(Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(Date)))					
+obj	Array(Tuple(\n    a Nullable(Int64),\n    b Nullable(String),\n    c Array(Nullable(Int64)),\n    d Nullable(Date)))					
 [(42,'Hello',[1,2,3],NULL),(43,'World',[],'2020-01-01')]
 [(NULL,NULL,[],NULL)]
-obj	Tuple(nested_obj Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(Date)))					
+obj	Tuple(\n    nested_obj Tuple(\n        a Nullable(Int64),\n        b Nullable(String),\n        c Array(Nullable(Int64)),\n        d Nullable(Date)))					
 ((42,'Hello',[1,2,3],NULL))
 ((43,'World',[],'2020-01-01'))
 ((NULL,NULL,[],NULL))
-obj	Tuple(a Tuple(b Nullable(Int64)), `a.b` Nullable(Int64), `a.b.c` Nullable(String))					
+obj	Tuple(\n    a Tuple(\n        b Nullable(Int64)),\n    `a.b` Nullable(Int64),\n    `a.b.c` Nullable(String))					
 ((1),NULL,NULL)
 ((NULL),2,'Hello')
-obj	Tuple(a Tuple(b Tuple(c Nullable(Int64))))					
+obj	Tuple(\n    a Tuple(\n        b Tuple(\n            c Nullable(Int64))))					
 (((NULL)))
 (((10)))
-obj	Tuple(a Nullable(String))					
+obj	Tuple(\n    a Nullable(String))					
 ('{}')
 obj	Nullable(String)					
 {}
-obj	Tuple(a Array(Tuple(b Array(Nullable(Int64)), c Tuple(d Nullable(Int64)), e Nullable(String))))					
+obj	Tuple(\n    a Array(Tuple(\n        b Array(Nullable(Int64)),\n        c Tuple(\n            d Nullable(Int64)),\n        e Nullable(String))))					
 ([([],(NULL),NULL),([],(NULL),NULL),([],(10),NULL)])
 ([([1,2,3],(NULL),'Hello')])
diff --git a/tests/queries/0_stateless/02876_json_incomplete_types_as_strings_inference.reference b/tests/queries/0_stateless/02876_json_incomplete_types_as_strings_inference.reference
index db94ffc9466..b904568391b 100644
--- a/tests/queries/0_stateless/02876_json_incomplete_types_as_strings_inference.reference
+++ b/tests/queries/0_stateless/02876_json_incomplete_types_as_strings_inference.reference
@@ -2,6 +2,6 @@ a	Nullable(String)
 b	Nullable(String)					
 c	Array(Nullable(String))					
 \N	{}	[]
-a	Tuple(b Nullable(String), c Array(Array(Nullable(String))))					
-d	Tuple(e Array(Nullable(String)), f Nullable(String))					
+a	Tuple(\n    b Nullable(String),\n    c Array(Array(Nullable(String))))					
+d	Tuple(\n    e Array(Nullable(String)),\n    f Nullable(String))					
 (NULL,[[],[]])	(['{}','{}'],NULL)
diff --git a/tests/queries/0_stateless/02889_print_pretty_type_names.reference b/tests/queries/0_stateless/02889_print_pretty_type_names.reference
index ea25df165bb..9af8e0142f8 100644
--- a/tests/queries/0_stateless/02889_print_pretty_type_names.reference
+++ b/tests/queries/0_stateless/02889_print_pretty_type_names.reference
@@ -5,18 +5,11 @@ a	Tuple(
         e Array(UInt32),
         f Array(Tuple(
             g String,
-            h Map(
-                String,
-                Array(Tuple(
-                    i String,
-                    j UInt64
-                ))
-            )
-        )),
-        k Date
-    ),
-    l Nullable(String)
-)					
+            h Map(String, Array(Tuple(
+                i String,
+                j UInt64))))),
+        k Date),
+    l Nullable(String))					
 Tuple(
     b String,
     c Tuple(
@@ -24,15 +17,8 @@ Tuple(
         e Array(UInt32),
         f Array(Tuple(
             g String,
-            h Map(
-                String,
-                Array(Tuple(
-                    i String,
-                    j UInt64
-                ))
-            )
-        )),
-        k Date
-    ),
-    l Nullable(String)
-)
+            h Map(String, Array(Tuple(
+                i String,
+                j UInt64))))),
+        k Date),
+    l Nullable(String))
diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference
index 2974fd92f3c..5d99df36bb4 100644
--- a/tests/queries/0_stateless/02890_describe_table_options.reference
+++ b/tests/queries/0_stateless/02890_describe_table_options.reference
@@ -2,205 +2,237 @@
 
 SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 0;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐
-│ id   │ UInt64                    │              │                    │ index column │                  │                │
-│ arr  │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │
-│ t    │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │
-└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘
+┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐
+│ id   │ UInt64                           │              │                    │ index column │                  │                │
+│ arr  │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │
+│ t    │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │
+└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐
-│ id   │ UInt64                    │              │                    │ index column │                  │                │
-│ arr  │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │
-│ t    │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │
-└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘
+┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐
+│ id   │ UInt64                           │              │                    │ index column │                  │                │
+│ arr  │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │
+│ t    │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │
+└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘
 SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
-│ id        │ UInt64                    │              │                    │ index column │                  │                │            0 │
-│ arr       │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │
-│ t         │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │
-│ arr.size0 │ UInt64                    │              │                    │              │                  │                │            1 │
-│ t.a       │ String                    │              │                    │              │ ZSTD(1)          │                │            1 │
-│ t.b       │ UInt64                    │              │                    │              │ ZSTD(1)          │                │            1 │
-└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘
+┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
+│ id        │ UInt64                           │              │                    │ index column │                  │                │            0 │
+│ arr       │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │
+│ t         │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │
+│ arr.size0 │ UInt64                           │              │                    │              │                  │                │            1 │
+│ t.a       │ String                           │              │                    │              │ ZSTD(1)          │                │            1 │
+│ t.b       │ UInt64                           │              │                    │              │ ZSTD(1)          │                │            1 │
+└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
-│ id        │ UInt64                    │              │                    │ index column │                  │                │            0 │
-│ arr       │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │
-│ t         │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │
-│ arr.size0 │ UInt64                    │              │                    │              │                  │                │            1 │
-│ t.a       │ String                    │              │                    │              │ ZSTD(1)          │                │            1 │
-│ t.b       │ UInt64                    │              │                    │              │ ZSTD(1)          │                │            1 │
-└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘
+┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
+│ id        │ UInt64                           │              │                    │ index column │                  │                │            0 │
+│ arr       │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │
+│ t         │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │
+│ arr.size0 │ UInt64                           │              │                    │              │                  │                │            1 │
+│ t.a       │ String                           │              │                    │              │ ZSTD(1)          │                │            1 │
+│ t.b       │ UInt64                           │              │                    │              │ ZSTD(1)          │                │            1 │
+└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘
 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 0;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐
-│ id               │ UInt64                    │              │                    │ index column │                  │                │          0 │
-│ arr              │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │          0 │
-│ t                │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │          0 │
-│ _part            │ LowCardinality(String)    │              │                    │              │                  │                │          1 │
-│ _part_index      │ UInt64                    │              │                    │              │                  │                │          1 │
-│ _part_uuid       │ UUID                      │              │                    │              │                  │                │          1 │
-│ _partition_id    │ LowCardinality(String)    │              │                    │              │                  │                │          1 │
-│ _partition_value │ UInt8                     │              │                    │              │                  │                │          1 │
-│ _sample_factor   │ Float64                   │              │                    │              │                  │                │          1 │
-│ _part_offset     │ UInt64                    │              │                    │              │                  │                │          1 │
-│ _row_exists      │ UInt8                     │              │                    │              │                  │                │          1 │
-│ _block_number    │ UInt64                    │              │                    │              │                  │                │          1 │
-└──────────────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘
+┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐
+│ id               │ UInt64                           │              │                    │ index column │                  │                │          0 │
+│ arr              │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │          0 │
+│ t                │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │          0 │
+│ _part            │ LowCardinality(String)           │              │                    │              │                  │                │          1 │
+│ _part_index      │ UInt64                           │              │                    │              │                  │                │          1 │
+│ _part_uuid       │ UUID                             │              │                    │              │                  │                │          1 │
+│ _partition_id    │ LowCardinality(String)           │              │                    │              │                  │                │          1 │
+│ _partition_value │ UInt8                            │              │                    │              │                  │                │          1 │
+│ _sample_factor   │ Float64                          │              │                    │              │                  │                │          1 │
+│ _part_offset     │ UInt64                           │              │                    │              │                  │                │          1 │
+│ _row_exists      │ UInt8                            │              │                    │              │                  │                │          1 │
+│ _block_number    │ UInt64                           │              │                    │              │                  │                │          1 │
+└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐
-│ id             │ UInt64                    │              │                    │ index column │                  │                │          0 │
-│ arr            │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │          0 │
-│ t              │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │          0 │
-│ _table         │ LowCardinality(String)    │              │                    │              │                  │                │          1 │
-│ _part          │ LowCardinality(String)    │              │                    │              │                  │                │          1 │
-│ _part_index    │ UInt64                    │              │                    │              │                  │                │          1 │
-│ _part_uuid     │ UUID                      │              │                    │              │                  │                │          1 │
-│ _partition_id  │ LowCardinality(String)    │              │                    │              │                  │                │          1 │
-│ _sample_factor │ Float64                   │              │                    │              │                  │                │          1 │
-│ _part_offset   │ UInt64                    │              │                    │              │                  │                │          1 │
-│ _row_exists    │ UInt8                     │              │                    │              │                  │                │          1 │
-│ _block_number  │ UInt64                    │              │                    │              │                  │                │          1 │
-│ _shard_num     │ UInt32                    │              │                    │              │                  │                │          1 │
-└────────────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘
+┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐
+│ id             │ UInt64                           │              │                    │ index column │                  │                │          0 │
+│ arr            │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │          0 │
+│ t              │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │          0 │
+│ _table         │ LowCardinality(String)           │              │                    │              │                  │                │          1 │
+│ _part          │ LowCardinality(String)           │              │                    │              │                  │                │          1 │
+│ _part_index    │ UInt64                           │              │                    │              │                  │                │          1 │
+│ _part_uuid     │ UUID                             │              │                    │              │                  │                │          1 │
+│ _partition_id  │ LowCardinality(String)           │              │                    │              │                  │                │          1 │
+│ _sample_factor │ Float64                          │              │                    │              │                  │                │          1 │
+│ _part_offset   │ UInt64                           │              │                    │              │                  │                │          1 │
+│ _row_exists    │ UInt8                            │              │                    │              │                  │                │          1 │
+│ _block_number  │ UInt64                           │              │                    │              │                  │                │          1 │
+│ _shard_num     │ UInt32                           │              │                    │              │                  │                │          1 │
+└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘
 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐
-│ id               │ UInt64                    │              │                    │ index column │                  │                │            0 │          0 │
-│ arr              │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │          0 │
-│ t                │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │          0 │
-│ _part            │ LowCardinality(String)    │              │                    │              │                  │                │            0 │          1 │
-│ _part_index      │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ _part_uuid       │ UUID                      │              │                    │              │                  │                │            0 │          1 │
-│ _partition_id    │ LowCardinality(String)    │              │                    │              │                  │                │            0 │          1 │
-│ _partition_value │ UInt8                     │              │                    │              │                  │                │            0 │          1 │
-│ _sample_factor   │ Float64                   │              │                    │              │                  │                │            0 │          1 │
-│ _part_offset     │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ _row_exists      │ UInt8                     │              │                    │              │                  │                │            0 │          1 │
-│ _block_number    │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ arr.size0        │ UInt64                    │              │                    │              │                  │                │            1 │          0 │
-│ t.a              │ String                    │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
-│ t.b              │ UInt64                    │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
-└──────────────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘
+┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐
+│ id               │ UInt64                           │              │                    │ index column │                  │                │            0 │          0 │
+│ arr              │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │          0 │
+│ t                │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │          0 │
+│ _part            │ LowCardinality(String)           │              │                    │              │                  │                │            0 │          1 │
+│ _part_index      │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ _part_uuid       │ UUID                             │              │                    │              │                  │                │            0 │          1 │
+│ _partition_id    │ LowCardinality(String)           │              │                    │              │                  │                │            0 │          1 │
+│ _partition_value │ UInt8                            │              │                    │              │                  │                │            0 │          1 │
+│ _sample_factor   │ Float64                          │              │                    │              │                  │                │            0 │          1 │
+│ _part_offset     │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ _row_exists      │ UInt8                            │              │                    │              │                  │                │            0 │          1 │
+│ _block_number    │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ arr.size0        │ UInt64                           │              │                    │              │                  │                │            1 │          0 │
+│ t.a              │ String                           │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
+│ t.b              │ UInt64                           │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
+└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐
-│ id             │ UInt64                    │              │                    │ index column │                  │                │            0 │          0 │
-│ arr            │ Array(UInt64)             │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │          0 │
-│ t              │ Tuple(a String, b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │          0 │
-│ _table         │ LowCardinality(String)    │              │                    │              │                  │                │            0 │          1 │
-│ _part          │ LowCardinality(String)    │              │                    │              │                  │                │            0 │          1 │
-│ _part_index    │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ _part_uuid     │ UUID                      │              │                    │              │                  │                │            0 │          1 │
-│ _partition_id  │ LowCardinality(String)    │              │                    │              │                  │                │            0 │          1 │
-│ _sample_factor │ Float64                   │              │                    │              │                  │                │            0 │          1 │
-│ _part_offset   │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ _row_exists    │ UInt8                     │              │                    │              │                  │                │            0 │          1 │
-│ _block_number  │ UInt64                    │              │                    │              │                  │                │            0 │          1 │
-│ _shard_num     │ UInt32                    │              │                    │              │                  │                │            0 │          1 │
-│ arr.size0      │ UInt64                    │              │                    │              │                  │                │            1 │          0 │
-│ t.a            │ String                    │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
-│ t.b            │ UInt64                    │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
-└────────────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘
+┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐
+│ id             │ UInt64                           │              │                    │ index column │                  │                │            0 │          0 │
+│ arr            │ Array(UInt64)                    │ DEFAULT      │ [10, 20]           │              │ ZSTD(1)          │                │            0 │          0 │
+│ t              │ Tuple(
+    a String,
+    b UInt64) │ DEFAULT      │ ('foo', 0)         │              │ ZSTD(1)          │                │            0 │          0 │
+│ _table         │ LowCardinality(String)           │              │                    │              │                  │                │            0 │          1 │
+│ _part          │ LowCardinality(String)           │              │                    │              │                  │                │            0 │          1 │
+│ _part_index    │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ _part_uuid     │ UUID                             │              │                    │              │                  │                │            0 │          1 │
+│ _partition_id  │ LowCardinality(String)           │              │                    │              │                  │                │            0 │          1 │
+│ _sample_factor │ Float64                          │              │                    │              │                  │                │            0 │          1 │
+│ _part_offset   │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ _row_exists    │ UInt8                            │              │                    │              │                  │                │            0 │          1 │
+│ _block_number  │ UInt64                           │              │                    │              │                  │                │            0 │          1 │
+│ _shard_num     │ UInt32                           │              │                    │              │                  │                │            0 │          1 │
+│ arr.size0      │ UInt64                           │              │                    │              │                  │                │            1 │          0 │
+│ t.a            │ String                           │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
+│ t.b            │ UInt64                           │              │                    │              │ ZSTD(1)          │                │            1 │          0 │
+└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘
 SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 0;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─┬─type──────────────────────┐
-│ id   │ UInt64                    │
-│ arr  │ Array(UInt64)             │
-│ t    │ Tuple(a String, b UInt64) │
-└──────┴───────────────────────────┘
+┌─name─┬─type─────────────────────────────┐
+│ id   │ UInt64                           │
+│ arr  │ Array(UInt64)                    │
+│ t    │ Tuple(
+    a String,
+    b UInt64) │
+└──────┴──────────────────────────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name─┬─type──────────────────────┐
-│ id   │ UInt64                    │
-│ arr  │ Array(UInt64)             │
-│ t    │ Tuple(a String, b UInt64) │
-└──────┴───────────────────────────┘
+┌─name─┬─type─────────────────────────────┐
+│ id   │ UInt64                           │
+│ arr  │ Array(UInt64)                    │
+│ t    │ Tuple(
+    a String,
+    b UInt64) │
+└──────┴──────────────────────────────────┘
 SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name──────┬─type──────────────────────┬─is_subcolumn─┐
-│ id        │ UInt64                    │            0 │
-│ arr       │ Array(UInt64)             │            0 │
-│ t         │ Tuple(a String, b UInt64) │            0 │
-│ arr.size0 │ UInt64                    │            1 │
-│ t.a       │ String                    │            1 │
-│ t.b       │ UInt64                    │            1 │
-└───────────┴───────────────────────────┴──────────────┘
+┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┐
+│ id        │ UInt64                           │            0 │
+│ arr       │ Array(UInt64)                    │            0 │
+│ t         │ Tuple(
+    a String,
+    b UInt64) │            0 │
+│ arr.size0 │ UInt64                           │            1 │
+│ t.a       │ String                           │            1 │
+│ t.b       │ UInt64                           │            1 │
+└───────────┴──────────────────────────────────┴──────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name──────┬─type──────────────────────┬─is_subcolumn─┐
-│ id        │ UInt64                    │            0 │
-│ arr       │ Array(UInt64)             │            0 │
-│ t         │ Tuple(a String, b UInt64) │            0 │
-│ arr.size0 │ UInt64                    │            1 │
-│ t.a       │ String                    │            1 │
-│ t.b       │ UInt64                    │            1 │
-└───────────┴───────────────────────────┴──────────────┘
+┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┐
+│ id        │ UInt64                           │            0 │
+│ arr       │ Array(UInt64)                    │            0 │
+│ t         │ Tuple(
+    a String,
+    b UInt64) │            0 │
+│ arr.size0 │ UInt64                           │            1 │
+│ t.a       │ String                           │            1 │
+│ t.b       │ UInt64                           │            1 │
+└───────────┴──────────────────────────────────┴──────────────┘
 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 0;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─────────────┬─type──────────────────────┬─is_virtual─┐
-│ id               │ UInt64                    │          0 │
-│ arr              │ Array(UInt64)             │          0 │
-│ t                │ Tuple(a String, b UInt64) │          0 │
-│ _part            │ LowCardinality(String)    │          1 │
-│ _part_index      │ UInt64                    │          1 │
-│ _part_uuid       │ UUID                      │          1 │
-│ _partition_id    │ LowCardinality(String)    │          1 │
-│ _partition_value │ UInt8                     │          1 │
-│ _sample_factor   │ Float64                   │          1 │
-│ _part_offset     │ UInt64                    │          1 │
-│ _row_exists      │ UInt8                     │          1 │
-│ _block_number    │ UInt64                    │          1 │
-└──────────────────┴───────────────────────────┴────────────┘
+┌─name─────────────┬─type─────────────────────────────┬─is_virtual─┐
+│ id               │ UInt64                           │          0 │
+│ arr              │ Array(UInt64)                    │          0 │
+│ t                │ Tuple(
+    a String,
+    b UInt64) │          0 │
+│ _part            │ LowCardinality(String)           │          1 │
+│ _part_index      │ UInt64                           │          1 │
+│ _part_uuid       │ UUID                             │          1 │
+│ _partition_id    │ LowCardinality(String)           │          1 │
+│ _partition_value │ UInt8                            │          1 │
+│ _sample_factor   │ Float64                          │          1 │
+│ _part_offset     │ UInt64                           │          1 │
+│ _row_exists      │ UInt8                            │          1 │
+│ _block_number    │ UInt64                           │          1 │
+└──────────────────┴──────────────────────────────────┴────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name───────────┬─type──────────────────────┬─is_virtual─┐
-│ id             │ UInt64                    │          0 │
-│ arr            │ Array(UInt64)             │          0 │
-│ t              │ Tuple(a String, b UInt64) │          0 │
-│ _table         │ LowCardinality(String)    │          1 │
-│ _part          │ LowCardinality(String)    │          1 │
-│ _part_index    │ UInt64                    │          1 │
-│ _part_uuid     │ UUID                      │          1 │
-│ _partition_id  │ LowCardinality(String)    │          1 │
-│ _sample_factor │ Float64                   │          1 │
-│ _part_offset   │ UInt64                    │          1 │
-│ _row_exists    │ UInt8                     │          1 │
-│ _block_number  │ UInt64                    │          1 │
-│ _shard_num     │ UInt32                    │          1 │
-└────────────────┴───────────────────────────┴────────────┘
+┌─name───────────┬─type─────────────────────────────┬─is_virtual─┐
+│ id             │ UInt64                           │          0 │
+│ arr            │ Array(UInt64)                    │          0 │
+│ t              │ Tuple(
+    a String,
+    b UInt64) │          0 │
+│ _table         │ LowCardinality(String)           │          1 │
+│ _part          │ LowCardinality(String)           │          1 │
+│ _part_index    │ UInt64                           │          1 │
+│ _part_uuid     │ UUID                             │          1 │
+│ _partition_id  │ LowCardinality(String)           │          1 │
+│ _sample_factor │ Float64                          │          1 │
+│ _part_offset   │ UInt64                           │          1 │
+│ _row_exists    │ UInt8                            │          1 │
+│ _block_number  │ UInt64                           │          1 │
+│ _shard_num     │ UInt32                           │          1 │
+└────────────────┴──────────────────────────────────┴────────────┘
 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes;
-┌─name─────────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐
-│ id               │ UInt64                    │            0 │          0 │
-│ arr              │ Array(UInt64)             │            0 │          0 │
-│ t                │ Tuple(a String, b UInt64) │            0 │          0 │
-│ _part            │ LowCardinality(String)    │            0 │          1 │
-│ _part_index      │ UInt64                    │            0 │          1 │
-│ _part_uuid       │ UUID                      │            0 │          1 │
-│ _partition_id    │ LowCardinality(String)    │            0 │          1 │
-│ _partition_value │ UInt8                     │            0 │          1 │
-│ _sample_factor   │ Float64                   │            0 │          1 │
-│ _part_offset     │ UInt64                    │            0 │          1 │
-│ _row_exists      │ UInt8                     │            0 │          1 │
-│ _block_number    │ UInt64                    │            0 │          1 │
-│ arr.size0        │ UInt64                    │            1 │          0 │
-│ t.a              │ String                    │            1 │          0 │
-│ t.b              │ UInt64                    │            1 │          0 │
-└──────────────────┴───────────────────────────┴──────────────┴────────────┘
+┌─name─────────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┐
+│ id               │ UInt64                           │            0 │          0 │
+│ arr              │ Array(UInt64)                    │            0 │          0 │
+│ t                │ Tuple(
+    a String,
+    b UInt64) │            0 │          0 │
+│ _part            │ LowCardinality(String)           │            0 │          1 │
+│ _part_index      │ UInt64                           │            0 │          1 │
+│ _part_uuid       │ UUID                             │            0 │          1 │
+│ _partition_id    │ LowCardinality(String)           │            0 │          1 │
+│ _partition_value │ UInt8                            │            0 │          1 │
+│ _sample_factor   │ Float64                          │            0 │          1 │
+│ _part_offset     │ UInt64                           │            0 │          1 │
+│ _row_exists      │ UInt8                            │            0 │          1 │
+│ _block_number    │ UInt64                           │            0 │          1 │
+│ arr.size0        │ UInt64                           │            1 │          0 │
+│ t.a              │ String                           │            1 │          0 │
+│ t.b              │ UInt64                           │            1 │          0 │
+└──────────────────┴──────────────────────────────────┴──────────────┴────────────┘
 DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes;
-┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐
-│ id             │ UInt64                    │            0 │          0 │
-│ arr            │ Array(UInt64)             │            0 │          0 │
-│ t              │ Tuple(a String, b UInt64) │            0 │          0 │
-│ _table         │ LowCardinality(String)    │            0 │          1 │
-│ _part          │ LowCardinality(String)    │            0 │          1 │
-│ _part_index    │ UInt64                    │            0 │          1 │
-│ _part_uuid     │ UUID                      │            0 │          1 │
-│ _partition_id  │ LowCardinality(String)    │            0 │          1 │
-│ _sample_factor │ Float64                   │            0 │          1 │
-│ _part_offset   │ UInt64                    │            0 │          1 │
-│ _row_exists    │ UInt8                     │            0 │          1 │
-│ _block_number  │ UInt64                    │            0 │          1 │
-│ _shard_num     │ UInt32                    │            0 │          1 │
-│ arr.size0      │ UInt64                    │            1 │          0 │
-│ t.a            │ String                    │            1 │          0 │
-│ t.b            │ UInt64                    │            1 │          0 │
-└────────────────┴───────────────────────────┴──────────────┴────────────┘
+┌─name───────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┐
+│ id             │ UInt64                           │            0 │          0 │
+│ arr            │ Array(UInt64)                    │            0 │          0 │
+│ t              │ Tuple(
+    a String,
+    b UInt64) │            0 │          0 │
+│ _table         │ LowCardinality(String)           │            0 │          1 │
+│ _part          │ LowCardinality(String)           │            0 │          1 │
+│ _part_index    │ UInt64                           │            0 │          1 │
+│ _part_uuid     │ UUID                             │            0 │          1 │
+│ _partition_id  │ LowCardinality(String)           │            0 │          1 │
+│ _sample_factor │ Float64                          │            0 │          1 │
+│ _part_offset   │ UInt64                           │            0 │          1 │
+│ _row_exists    │ UInt8                            │            0 │          1 │
+│ _block_number  │ UInt64                           │            0 │          1 │
+│ _shard_num     │ UInt32                           │            0 │          1 │
+│ arr.size0      │ UInt64                           │            1 │          0 │
+│ t.a            │ String                           │            1 │          0 │
+│ t.b            │ UInt64                           │            1 │          0 │
+└────────────────┴──────────────────────────────────┴──────────────┴────────────┘
diff --git a/tests/queries/0_stateless/02906_flatten_only_true_nested.reference b/tests/queries/0_stateless/02906_flatten_only_true_nested.reference
index e7a96da8db9..b259b1e4563 100644
--- a/tests/queries/0_stateless/02906_flatten_only_true_nested.reference
+++ b/tests/queries/0_stateless/02906_flatten_only_true_nested.reference
@@ -1,3 +1,3 @@
 data.x	Array(UInt32)					
 data.y	Array(UInt32)					
-data	Array(Tuple(x UInt64, y UInt64))					
+data	Array(Tuple(\n    x UInt64,\n    y UInt64))					
diff --git a/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
index aac3e471264..aac8c4f777e 100644
--- a/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
+++ b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
@@ -1 +1 @@
-data	Array(Tuple(Nullable(Int64), Tuple(a Nullable(Int64), b Nullable(Int64)), Nullable(Int64), Nullable(String)))					
+data	Array(Tuple(Nullable(Int64), Tuple(\n    a Nullable(Int64),\n    b Nullable(Int64)), Nullable(Int64), Nullable(String)))					

From 85b4d6c838bde329f34d74feb3d2ffde1de9f1a7 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Thu, 4 Jan 2024 01:46:03 +0000
Subject: [PATCH 139/204] fix shell check

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
index a88c0ddd5e9..eab44e74d88 100755
--- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
+++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
@@ -15,7 +15,7 @@ chmod 777 ${FILE_PATH}
 FILE="test_symlink_${CLICKHOUSE_DATABASE}"
 
 symlink_path=${FILE_PATH}/${FILE}
-symlink_path_with_regex=${FILE_PATH}*/${FILE}
+symlink_path_with_regex="${FILE_PATH}*/${FILE}"
 file_path=$CUR_DIR/${FILE}
 
 touch ${file_path}

From 8f26e2af67614984d1e7db0330f556e1fb584c18 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 3 Jan 2024 13:52:08 +0000
Subject: [PATCH 140/204] Even better Keeper startup

---
 src/Coordination/KeeperServer.cpp | 106 ++++++++++++++----------------
 1 file changed, 48 insertions(+), 58 deletions(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index fb56d58cb72..965e743da39 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -4,6 +4,7 @@
 #include "config.h"
 
 #include <chrono>
+#include <mutex>
 #include <string>
 #include <Coordination/KeeperStateMachine.h>
 #include <Coordination/KeeperStateManager.h>
@@ -14,6 +15,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <boost/algorithm/string.hpp>
+#include <libnuraft/callback.hxx>
 #include <libnuraft/cluster_config.hxx>
 #include <libnuraft/log_val_type.hxx>
 #include <libnuraft/msg_type.hxx>
@@ -196,13 +198,9 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
         nuraft::raft_server::commit_in_bg();
     }
 
-    void commitLogs(uint64_t index_to_commit, bool initial_commit_exec)
+    std::unique_lock<std::recursive_mutex> lockRaft()
     {
-        leader_commit_index_.store(index_to_commit);
-        quick_commit_index_ = index_to_commit;
-        lagging_sm_target_index_ = index_to_commit;
-
-        commit_in_bg_exec(0, initial_commit_exec);
+        return std::unique_lock(lock_);
     }
 
     using nuraft::raft_server::raft_server;
@@ -518,6 +516,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
 RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
 {
     std::vector<nuraft::ptr<nuraft::buffer>> entries;
+    entries.reserve(requests_for_sessions.size());
     for (const auto & request_for_session : requests_for_sessions)
         entries.push_back(getZooKeeperLogEntry(request_for_session));
 
@@ -630,32 +629,32 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
     {
         const auto preprocess_logs = [&]
         {
+            auto lock = raft_instance->lockRaft();
             keeper_context->local_logs_preprocessed = true;
             auto log_store = state_manager->load_log_store();
-            if (last_log_idx_on_disk > 0 && last_log_idx_on_disk > state_machine->last_commit_index())
-            {
-                auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, last_log_idx_on_disk + 1);
+            auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, log_store->next_slot());
 
-                size_t preprocessed = 0;
-                LOG_INFO(log, "Preprocessing {} log entries", log_entries->size());
-                auto idx = state_machine->last_commit_index() + 1;
-                for (const auto & entry : *log_entries)
-                {
-                    if (entry && entry->get_val_type() == nuraft::log_val_type::app_log)
-                        state_machine->pre_commit(idx, entry->get_buf());
-
-                    ++idx;
-                    ++preprocessed;
-
-                    if (preprocessed % 50000 == 0)
-                        LOG_TRACE(log, "Preprocessed {}/{} entries", preprocessed, log_entries->size());
-                }
-                LOG_INFO(log, "Preprocessing done");
-            }
-            else
+            if (log_entries->empty())
             {
                 LOG_INFO(log, "All local log entries preprocessed");
+                return;
             }
+
+            size_t preprocessed = 0;
+            LOG_INFO(log, "Preprocessing {} log entries", log_entries->size());
+            auto idx = state_machine->last_commit_index() + 1;
+            for (const auto & entry : *log_entries)
+            {
+                if (entry && entry->get_val_type() == nuraft::log_val_type::app_log)
+                    state_machine->pre_commit(idx, entry->get_buf());
+
+                ++idx;
+                ++preprocessed;
+
+                if (preprocessed % 50000 == 0)
+                    LOG_TRACE(log, "Preprocessed {}/{} entries", preprocessed, log_entries->size());
+            }
+            LOG_INFO(log, "Preprocessing done");
         };
 
         switch (type)
@@ -666,43 +665,34 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
                 /// until we preprocess all stored logs
                 return nuraft::cb_func::ReturnCode::ReturnNull;
             }
-            case nuraft::cb_func::InitialBatchCommited:
-            {
-                preprocess_logs();
-                break;
-            }
             case nuraft::cb_func::GotAppendEntryReqFromLeader:
             {
-                auto & req = *static_cast<nuraft::req_msg *>(param->ctx);
-
-                if (req.get_commit_idx() == 0 || req.log_entries().empty())
-                    break;
-
-                auto last_committed_index = state_machine->last_commit_index();
-                // Actual log number.
-                auto index_to_commit = std::min({last_log_idx_on_disk, req.get_last_log_idx(), req.get_commit_idx()});
-
-                if (index_to_commit > last_committed_index)
-                {
-                    LOG_TRACE(log, "Trying to commit local log entries, committing upto {}", index_to_commit);
-                    raft_instance->commitLogs(index_to_commit, true);
-                    /// after we manually committed all the local logs we can, we assert that all of the local logs are either
-                    /// committed or preprocessed
-                    if (!keeper_context->local_logs_preprocessed)
-                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Local logs are not preprocessed");
-                }
-                else if (last_log_idx_on_disk <= last_committed_index)
-                {
-                    keeper_context->local_logs_preprocessed = true;
-                }
-                else if
-                (
-                    index_to_commit == 0 ||
-                    (index_to_commit == last_committed_index && last_log_idx_on_disk > index_to_commit)  /// we need to rollback all the logs so we preprocess all of them
-                )
+                /// maybe we got snapshot installed
+                if (state_machine->last_commit_index() >= last_log_idx_on_disk)
                 {
                     preprocess_logs();
+                    break;
                 }
+
+                auto & req = *static_cast<nuraft::req_msg *>(param->ctx);
+
+                if (req.log_entries().empty())
+                    break;
+
+                if (req.get_last_log_idx() < last_log_idx_on_disk)
+                    last_log_idx_on_disk = req.get_last_log_idx();
+                /// we don't want to accept too many new logs before we preprocess all the local logs
+                /// because the next log index is decreased on each failure we need to also accept requests when it's near last_log_idx_on_disk
+                /// so the counter is reset on the leader side
+                else if (raft_instance->get_target_committed_log_idx() >= last_log_idx_on_disk && req.get_last_log_idx() > last_log_idx_on_disk)
+                    return nuraft::cb_func::ReturnNull;
+
+                break;
+            }
+            case nuraft::cb_func::StateMachineExecution:
+            {
+                if (state_machine->last_commit_index() >= last_log_idx_on_disk)
+                    preprocess_logs();
                 break;
             }
             default:

From 13749f550f85a9222c41fabe1ede19b8347f6380 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 4 Jan 2024 07:59:44 +0000
Subject: [PATCH 141/204] Lower log levels for some Raft logs

---
 contrib/NuRaft                   | 2 +-
 src/Coordination/LoggerWrapper.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index b7ea89b817a..636e83c33b2 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
+Subproject commit 636e83c33b2243d88935d8bf78022f225f315154
diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index ae3ff1553b0..d092a8d4440 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -13,6 +13,7 @@ private:
 
     static inline const std::unordered_map<LogsLevel, Poco::Message::Priority> LEVELS =
     {
+        {LogsLevel::test, Poco::Message::Priority::PRIO_TEST},
         {LogsLevel::trace, Poco::Message::Priority::PRIO_TRACE},
         {LogsLevel::debug, Poco::Message::Priority::PRIO_DEBUG},
         {LogsLevel::information, Poco::Message::PRIO_INFORMATION},

From 3c7ae2f171bb8bf56d04677448a6ab0384f865a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:20:07 +0000
Subject: [PATCH 142/204] Reduce bounding_ratio.xml

---
 tests/performance/bounding_ratio.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/bounding_ratio.xml b/tests/performance/bounding_ratio.xml
index e3a15f90013..ed0b25848df 100644
--- a/tests/performance/bounding_ratio.xml
+++ b/tests/performance/bounding_ratio.xml
@@ -1,4 +1,4 @@
 <test>
-    <query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
-    <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
+    <query>SELECT boundingRatio(number, number) FROM numbers(30000000)</query>
+    <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(30000000)</query>
 </test>

From 39eaa8dc9cd599b337a091dafa8cd3bb020e1b47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:24:36 +0000
Subject: [PATCH 143/204] Halve the size of reinterpret_as.xml

---
 tests/performance/reinterpret_as.xml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index dbf6df160ed..d05ef3bb038 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -19,7 +19,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -38,7 +38,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -57,7 +57,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -76,7 +76,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -95,7 +95,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(10000000)
+        FROM numbers_mt(5000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -115,7 +115,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -134,7 +134,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -153,7 +153,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -172,7 +172,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -191,7 +191,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(100000000)
+        FROM numbers_mt(50000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -210,7 +210,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(10000000)
+        FROM numbers_mt(5000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -230,7 +230,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(10000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -249,7 +249,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(100000000)
+        FROM numbers_mt(50000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 2aa6690f2c63c4630c04b6cae54e0fdbb8b12082 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:29:17 +0000
Subject: [PATCH 144/204] Reduce hashed_dictionary.xml

---
 tests/performance/hashed_dictionary.xml | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml
index e9038e694c6..b9de02a70e0 100644
--- a/tests/performance/hashed_dictionary.xml
+++ b/tests/performance/hashed_dictionary.xml
@@ -82,7 +82,6 @@
             <name>elements_count</name>
             <values>
                 <value>5000000</value>
-                <value>7500000</value>
             </values>
         </substitution>
     </substitutions>
@@ -90,16 +89,14 @@
     <query>
         WITH rand64() % toUInt64({elements_count}) as key
         SELECT dictGet('default.simple_key_hashed_dictionary', {column_name}, key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
     <query>
         WITH rand64() % toUInt64({elements_count}) as key
         SELECT dictHas('default.simple_key_hashed_dictionary', key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
@@ -111,16 +108,14 @@
     <query>
         WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
         SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
     <query>
         WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
         SELECT dictHas('default.complex_key_hashed_dictionary', key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 

From 1d1edd5b57b6f6cf188c6c616d09b374a9144268 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:31:20 +0000
Subject: [PATCH 145/204] Reduce sum_map.xml

---
 tests/performance/sum_map.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml
index f55af077023..ffb9b9507ae 100644
--- a/tests/performance/sum_map.xml
+++ b/tests/performance/sum_map.xml
@@ -7,7 +7,7 @@
         <substitution>
            <name>scale</name>
            <values>
-               <value>1000000</value>
+               <value>100000</value>
            </values>
        </substitution>
         <substitution>

From 641caba5b0d1caf6a4146c769ee3af6b55bd8899 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:36:33 +0000
Subject: [PATCH 146/204] Adapt more tests

---
 tests/performance/group_by_fixed_keys.xml | 2 +-
 tests/performance/join_used_flags.xml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/group_by_fixed_keys.xml b/tests/performance/group_by_fixed_keys.xml
index a64208eb3de..d74b65ad47a 100644
--- a/tests/performance/group_by_fixed_keys.xml
+++ b/tests/performance/group_by_fixed_keys.xml
@@ -11,7 +11,7 @@
 
     <create_query>create table group_by_fk(a UInt32, b UInt32, c LowCardinality(UInt32), d Nullable(UInt32), e UInt64, f UInt64, g UInt64, h LowCardinality(UInt64), i Nullable(UInt64)) engine=MergeTree order by tuple()</create_query>
 
-    <fill_query>insert into group_by_fk select number, number, number % 10000, number % 2 == 0 ? number : Null, number, number, number, number % 10000, number % 2 == 0 ? number : Null from numbers_mt(3e7)</fill_query>
+    <fill_query>insert into group_by_fk select number, number, number % 10000, number % 2 == 0 ? number : Null, number, number, number, number % 10000, number % 2 == 0 ? number : Null from numbers_mt(1e7) settings max_insert_threads=8</fill_query>
 
     <!-- keys64_two_level -->
     <query>select a, b from group_by_fk group by a, b format Null</query>
diff --git a/tests/performance/join_used_flags.xml b/tests/performance/join_used_flags.xml
index 70b0b45391d..1bb994f7be2 100644
--- a/tests/performance/join_used_flags.xml
+++ b/tests/performance/join_used_flags.xml
@@ -1,6 +1,6 @@
 <test>
     <create_query>CREATE TABLE test_join_used_flags (i64 Int64, i32 Int32) ENGINE = Memory</create_query>
-    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(3000000)</fill_query>
+    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(1500000)</fill_query>
     <query>SELECT l.i64, r.i64, l.i32, r.i32 FROM test_join_used_flags l RIGHT JOIN test_join_used_flags r USING i64 format Null</query>
     <drop_query>DROP TABLE IF EXISTS test_join_used_flags</drop_query>
 </test>

From b5997e6a9639f54698cf1dda354625a5f20bb776 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 15:06:38 +0300
Subject: [PATCH 147/204] MergeTreePrefetchedReadPool disable for LIMIT only
 queries

---
 src/Interpreters/InterpreterSelectQuery.cpp    | 7 ++++++-
 src/Planner/PlannerJoinTree.cpp                | 7 ++++++-
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 8 +++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index cdf1b4228bc..d3d7470ad25 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2501,7 +2501,12 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
             max_block_size = std::max<UInt64>(1, max_block_limited);
             max_threads_execute_query = max_streams = 1;
         }
-        if (max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        if (local_limits.local_limits.size_limits.max_rows != 0 &&
+            max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        {
+            query_info.limit = max_block_limited;
+        }
+        else
         {
             query_info.limit = max_block_limited;
         }
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index e2cdf146a69..095db09ffbd 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -645,7 +645,12 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     max_threads_execute_query = 1;
                 }
 
-                if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0 &&
+                    max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                {
+                    table_expression_query_info.limit = max_block_size_limited;
+                }
+                else
                 {
                     table_expression_query_info.limit = max_block_size_limited;
                 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index aa1c463e4e6..bdb2f7ea009 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -418,7 +418,13 @@ Pipe ReadFromMergeTree::readFromPool(
         && settings.allow_prefetched_read_pool_for_local_filesystem
         && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.local_fs_method);
 
-    if (allow_prefetched_remote || allow_prefetched_local)
+    /** Do not use prefetched read pool if query is trivial limit query.
+      * Because time spend during filling per thread tasks can be greater than whole query
+      * execution for big tables with small limit.
+      */
+    bool use_prefetched_read_pool = query_info.limit != 0 && (allow_prefetched_remote || allow_prefetched_local);
+
+    if (use_prefetched_read_pool)
     {
         pool = std::make_shared<MergeTreePrefetchedReadPool>(
             std::move(parts_with_range),

From 8573c66b09d3879d65069d5b50713ad0714238b5 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 15:29:25 +0300
Subject: [PATCH 148/204] Fixed code review issues

---
 src/Interpreters/InterpreterSelectQuery.cpp | 6 +++---
 src/Planner/PlannerJoinTree.cpp             | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d3d7470ad25..b6c9b8cdba3 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2501,10 +2501,10 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
             max_block_size = std::max<UInt64>(1, max_block_limited);
             max_threads_execute_query = max_streams = 1;
         }
-        if (local_limits.local_limits.size_limits.max_rows != 0 &&
-            max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        if (local_limits.local_limits.size_limits.max_rows != 0)
         {
-            query_info.limit = max_block_limited;
+            if (max_block_limited < local_limits.local_limits.size_limits.max_rows)
+                query_info.limit = max_block_limited;
         }
         else
         {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 095db09ffbd..857fb993600 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -645,10 +645,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     max_threads_execute_query = 1;
                 }
 
-                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0 &&
-                    max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0)
                 {
-                    table_expression_query_info.limit = max_block_size_limited;
+                    if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                        table_expression_query_info.limit = max_block_size_limited;
                 }
                 else
                 {

From 74fb390444baec49360f5e07a34b32f63684218c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Jan 2024 13:36:42 +0100
Subject: [PATCH 149/204] fix build

---
 src/Interpreters/DDLTask.cpp | 2 +-
 src/Interpreters/DDLTask.h   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 85bf6fec655..d418be51cc5 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -232,7 +232,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
             throw Exception(
                 ErrorCodes::DNS_ERROR,
                 "{} is not a local address. Check parameter 'host_name' in the configuration",
-                *config_host_name)
+                *config_host_name);
     }
 
     for (const HostID & host : entry.hosts)
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index e1a81ac97af..bc45b46bf0f 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -44,6 +44,9 @@ struct HostID
     explicit HostID(const Cluster::Address & address)
         : host_name(address.host_name), port(address.port) {}
 
+    HostID(const String & host_name_, UInt16 port_)
+        : host_name(host_name_), port(port_) {}
+
     static HostID fromString(const String & host_port_str);
 
     String toString() const

From 82d3d570530ebd014717ba0e11bfd975fe2502e7 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 4 Jan 2024 12:45:17 +0000
Subject: [PATCH 150/204] Sync content of the docker test images

---
 docker/test/stateless/stress_tests.lib | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index 8f89c1b80dd..6f0dabb5207 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -236,6 +236,10 @@ function check_logs_for_critical_errors()
         && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
         || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
 
+    rg -Fa "it is lost forever" /var/log/clickhouse-server/clickhouse-server*.log | grep 'SharedMergeTreePartCheckThread' > /dev/null \
+        && echo -e "Lost forever for SharedMergeTree$FAIL" >> /test_output/test_results.tsv \
+        || echo -e "No SharedMergeTree lost forever in clickhouse-server.log$OK" >> /test_output/test_results.tsv
+
     # Remove file no_such_key_errors.txt if it's empty
     [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
 

From 5bfddfebb6ac1f50ebbdca5d0e146f72fe085793 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 14:08:58 +0000
Subject: [PATCH 151/204] Fix instantiation detection

---
 src/AggregateFunctions/AggregateFunctionMax.cpp     | 5 +++--
 src/AggregateFunctions/AggregateFunctionMin.cpp     | 5 +++--
 src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index 2577c932592..e9cd651b8db 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,6 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
+#include <Common/Concepts.h>
 #include <Common/findExtreme.h>
 
 namespace DB
@@ -74,7 +75,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
@@ -169,7 +170,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 701101e7207..d767bd5c563 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,6 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
+#include <Common/Concepts.h>
 #include <Common/findExtreme.h>
 
 
@@ -75,7 +76,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
@@ -170,7 +171,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index b69a0b100a3..dec70861543 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -965,6 +965,7 @@ template <typename Data>
 struct AggregateFunctionMinData : Data
 {
     using Self = AggregateFunctionMinData;
+    using Impl = Data;
 
     bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena)     { return this->changeIfLess(column, row_num, arena); }
     bool changeIfBetter(const Self & to, Arena * arena)                            { return this->changeIfLess(to, arena); }
@@ -993,6 +994,7 @@ template <typename Data>
 struct AggregateFunctionMaxData : Data
 {
     using Self = AggregateFunctionMaxData;
+    using Impl = Data;
 
     bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena)     { return this->changeIfGreater(column, row_num, arena); }
     bool changeIfBetter(const Self & to, Arena * arena)                            { return this->changeIfGreater(to, arena); }

From d9f68f4a2c4e3fcdce8776af5d9ee2cf7a551f15 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 17:16:47 +0300
Subject: [PATCH 152/204] Fixed tests

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index bdb2f7ea009..6f0429459cd 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -422,7 +422,7 @@ Pipe ReadFromMergeTree::readFromPool(
       * Because time spend during filling per thread tasks can be greater than whole query
       * execution for big tables with small limit.
       */
-    bool use_prefetched_read_pool = query_info.limit != 0 && (allow_prefetched_remote || allow_prefetched_local);
+    bool use_prefetched_read_pool = query_info.limit == 0 && (allow_prefetched_remote || allow_prefetched_local);
 
     if (use_prefetched_read_pool)
     {

From 494a32f4e47af2576455cda2794ffa13568c60f3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 4 Jan 2024 14:41:04 +0000
Subject: [PATCH 153/204] Review fixes

---
 src/Storages/StorageS3.cpp | 105 ++++++++-----------------------------
 1 file changed, 23 insertions(+), 82 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ce49be32120..d7cc86ed321 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -147,7 +147,8 @@ public:
         const Names & column_names_,
         StorageSnapshotPtr storage_snapshot_,
         StorageS3 & storage_,
-        SelectQueryInfo query_info_,
+        ReadFromFormatInfo read_from_format_info_,
+        bool need_only_count_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
@@ -155,7 +156,8 @@ public:
         , column_names(column_names_)
         , storage_snapshot(std::move(storage_snapshot_))
         , storage(storage_)
-        , query_info(std::move(query_info_))
+        , read_from_format_info(std::move(read_from_format_info_))
+        , need_only_count(need_only_count_)
         , local_context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -168,7 +170,8 @@ private:
     Names column_names;
     StorageSnapshotPtr storage_snapshot;
     StorageS3 & storage;
-    SelectQueryInfo query_info;
+    ReadFromFormatInfo read_from_format_info;
+    bool need_only_count;
     StorageS3::Configuration query_configuration;
     NamesAndTypesList virtual_columns;
 
@@ -183,77 +186,6 @@ private:
 };
 
 
-static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const std::unordered_set<String> & keys)
-{
-    Block virtual_columns_block;
-    fs::path bucket_path(bucket);
-
-    for (const auto & [column_name, column_type] : virtual_columns)
-    {
-        if (column_name == "_path")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-                column->insert((bucket_path / key).string());
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else if (column_name == "_file")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-            {
-                auto pos = key.find_last_of('/');
-                if (pos != std::string::npos)
-                    column->insert(key.substr(pos + 1));
-                else
-                    column->insert(key);
-            }
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else if (column_name == "_key")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-                column->insert(key);
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else
-        {
-            auto column = column_type->createColumn();
-            column->insertManyDefaults(keys.size());
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-    }
-
-    /// Column _key is mandatory and may not be in virtual_columns list
-    if (!virtual_columns_block.has("_key"))
-    {
-        auto column_type = std::make_shared<DataTypeString>();
-        auto column = column_type->createColumn(); for (const auto & key : keys)
-            column->insert(key);
-        virtual_columns_block.insert({std::move(column), column_type, "_key"});
-    }
-
-    return virtual_columns_block;
-}
-
-static std::vector<String> filterKeysForPartitionPruning(
-    const std::vector<String> & keys,
-    const String & bucket,
-    const NamesAndTypesList & virtual_columns,
-    const ActionsDAG::Node * predicate,
-    ContextPtr context)
-{
-    std::unordered_set<String> result_keys(keys.begin(), keys.end());
-
-    auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
-    VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context);
-    result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
-
-    LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", result_keys.size(), keys.size());
-    return std::vector<String>(result_keys.begin(), result_keys.end());
-}
-
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 
@@ -305,9 +237,9 @@ public:
                 "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
 
         recursive = globbed_uri.key == "/**" ? true : false;
-        fillInternalBufferAssumeLocked();
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        fillInternalBufferAssumeLocked();
     }
 
     KeyWithInfoPtr next()
@@ -1161,7 +1093,17 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     }
     else
     {
-        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, predicate, local_context);
+        Strings keys = configuration.keys;
+        auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(keys.size());
+            for (const auto & key : keys)
+                paths.push_back(fs::path(configuration.url.bucket) / key);
+            VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context);
+        }
+
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1195,12 +1137,16 @@ void StorageS3::read(
 {
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns);
 
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
+
     auto reading = std::make_unique<ReadFromStorageS3Step>(
         read_from_format_info.source_header,
         column_names,
         storage_snapshot,
         *this,
-        query_info,
+        std::move(read_from_format_info),
+        need_only_count,
         local_context,
         max_block_size,
         num_streams);
@@ -1235,8 +1181,6 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
 
     createIterator(nullptr);
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
-
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
         num_streams = std::min(num_streams, estimated_keys_count);
@@ -1244,9 +1188,6 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
         /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
         num_streams = 1;
 
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
-
     const size_t max_threads = local_context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);

From d7883eeaf4fdba3aa14b9486742d602d5267d2a8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 4 Jan 2024 15:15:28 +0000
Subject: [PATCH 154/204] Fix some tests.

---
 .../QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index efb75f74415..d1f0c1ebe5e 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -480,7 +480,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
             auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock(
                 metadata,
                 candidate.dag->getRequiredColumnsNames(),
-                dag.dag,
+                (dag.filter_node ? dag.dag : nullptr),
                 parts,
                 max_added_blocks.get(),
                 context);

From 39b15f91303483ca3f9f5efcaab6cea6236b7d46 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 4 Jan 2024 16:33:52 +0100
Subject: [PATCH 155/204] Add a comment

---
 src/Planner/CollectTableExpressionData.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 38c986fd31f..78a7c7074c3 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -46,9 +46,12 @@ public:
             for (auto & using_element : using_list)
             {
                 auto & column_node = using_element->as<ColumnNode&>();
+                /// This list contains column nodes from left and right tables.
                 auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as<ListNode&>().getNodes();
 
+                /// Visit left table column node.
                 visitUsingColumn(columns_from_subtrees[0]);
+                /// Visit right table column node.
                 visitUsingColumn(columns_from_subtrees[1]);
             }
             return;

From 296e1ac8aa000996f004343aa299e5b732c7c8df Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:11:39 +0000
Subject: [PATCH 156/204] FunctionSqid.cpp --> sqid.cpp

---
 src/Functions/{FunctionSqid.cpp => sqid.cpp} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/Functions/{FunctionSqid.cpp => sqid.cpp} (100%)

diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/sqid.cpp
similarity index 100%
rename from src/Functions/FunctionSqid.cpp
rename to src/Functions/sqid.cpp

From 03e344c36ae27b62cfcf058640b81b8ae8460afe Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:15:06 +0000
Subject: [PATCH 157/204] Fix preprocessor guard

---
 src/Functions/sqid.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index 546263914c2..4517bba963e 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -1,6 +1,6 @@
 #include "config.h"
 
-#ifdef ENABLE_SQIDS
+#if USE_SQIDS
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>

From 98d602c3d5e3e197ed9d3579ad34155b386acb74 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:21:08 +0000
Subject: [PATCH 158/204] Reserve enough space in result column upfront

---
 src/Functions/sqid.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index 4517bba963e..abd9d22f4c5 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -57,9 +57,10 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        size_t num_args = arguments.size();
         auto col_res = ColumnString::create();
+        col_res->reserve(input_rows_count);
 
+        const size_t num_args = arguments.size();
         std::vector<UInt64> numbers(num_args);
         for (size_t i = 0; i < input_rows_count; ++i)
         {

From 52058211e7ff227feb9c890f641d2299af9a246c Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 4 Jan 2024 08:21:46 -0800
Subject: [PATCH 159/204] Fix some thread pool settings not updating at runtime
 (#58485)

---
 programs/server/Server.cpp | 94 +++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 926e57070f3..1fa3d1cfa73 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1260,11 +1260,11 @@ try
         {
             Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
 
-            ServerSettings server_settings_;
-            server_settings_.loadSettingsFromConfig(*config);
+            ServerSettings new_server_settings;
+            new_server_settings.loadSettingsFromConfig(*config);
 
-            size_t max_server_memory_usage = server_settings_.max_server_memory_usage;
-            double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio;
+            size_t max_server_memory_usage = new_server_settings.max_server_memory_usage;
+            double max_server_memory_usage_to_ram_ratio = new_server_settings.max_server_memory_usage_to_ram_ratio;
 
             size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically.
             size_t default_max_server_memory_usage = static_cast<size_t>(current_physical_server_memory * max_server_memory_usage_to_ram_ratio);
@@ -1294,9 +1294,9 @@ try
             total_memory_tracker.setDescription("(total)");
             total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
 
-            size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit;
+            size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit;
 
-            size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+            size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             if (merges_mutations_memory_usage_soft_limit == 0)
             {
                 merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
@@ -1304,7 +1304,7 @@ try
                     " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
                     formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
                     formatReadableSizeWithBinarySuffix(current_physical_server_memory),
-                    server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+                    new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             }
             else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
             {
@@ -1313,7 +1313,7 @@ try
                     " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
                     formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
                     formatReadableSizeWithBinarySuffix(current_physical_server_memory),
-                    server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+                    new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             }
 
             LOG_INFO(log, "Merges and mutations memory limit is set to {}",
@@ -1322,7 +1322,7 @@ try
             background_memory_tracker.setDescription("(background)");
             background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking);
 
-            total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory);
+            total_memory_tracker.setAllowUseJemallocMemory(new_server_settings.allow_use_jemalloc_memory);
 
             auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
             total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
@@ -1346,26 +1346,26 @@ try
             global_context->setRemoteHostFilter(*config);
             global_context->setHTTPHeaderFilter(*config);
 
-            global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
-            global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
-            global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn);
-            global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn);
-            global_context->setMaxPartNumToWarn(server_settings_.max_part_num_to_warn);
+            global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop);
+            global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop);
+            global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn);
+            global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
+            global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
 
             ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
-            if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
-                concurrent_threads_soft_limit = server_settings_.concurrent_threads_soft_limit_num;
-            if (server_settings_.concurrent_threads_soft_limit_ratio_to_cores > 0)
+            if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
+                concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
+            if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
             {
-                auto value = server_settings_.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
+                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
                 if (value > 0 && value < concurrent_threads_soft_limit)
                     concurrent_threads_soft_limit = value;
             }
             ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
 
-            global_context->getProcessList().setMaxSize(server_settings_.max_concurrent_queries);
-            global_context->getProcessList().setMaxInsertQueriesAmount(server_settings_.max_concurrent_insert_queries);
-            global_context->getProcessList().setMaxSelectQueriesAmount(server_settings_.max_concurrent_select_queries);
+            global_context->getProcessList().setMaxSize(new_server_settings.max_concurrent_queries);
+            global_context->getProcessList().setMaxInsertQueriesAmount(new_server_settings.max_concurrent_insert_queries);
+            global_context->getProcessList().setMaxSelectQueriesAmount(new_server_settings.max_concurrent_select_queries);
 
             if (config->has("keeper_server"))
                 global_context->updateKeeperConfiguration(*config);
@@ -1376,68 +1376,68 @@ try
             /// This is done for backward compatibility.
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_pool_size;
-                auto new_ratio = server_settings_.background_merges_mutations_concurrency_ratio;
+                auto new_pool_size = new_server_settings.background_pool_size;
+                auto new_ratio = new_server_settings.background_merges_mutations_concurrency_ratio;
                 global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, static_cast<size_t>(new_pool_size * new_ratio));
-                global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings_.background_merges_mutations_scheduling_policy.toString());
+                global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_server_settings.background_merges_mutations_scheduling_policy.toString());
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_move_pool_size;
+                auto new_pool_size = new_server_settings.background_move_pool_size;
                 global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_fetches_pool_size;
+                auto new_pool_size = new_server_settings.background_fetches_pool_size;
                 global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_common_pool_size;
+                auto new_pool_size = new_server_settings.background_common_pool_size;
                 global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
-            global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings_.background_buffer_flush_schedule_pool_size);
-            global_context->getSchedulePool().increaseThreadsCount(server_settings_.background_schedule_pool_size);
-            global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size);
-            global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size);
+            global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_server_settings.background_buffer_flush_schedule_pool_size);
+            global_context->getSchedulePool().increaseThreadsCount(new_server_settings.background_schedule_pool_size);
+            global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_server_settings.background_message_broker_schedule_pool_size);
+            global_context->getDistributedSchedulePool().increaseThreadsCount(new_server_settings.background_distributed_schedule_pool_size);
 
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, server_settings_.tables_loader_foreground_pool_size);
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, server_settings_.tables_loader_background_pool_size);
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, server_settings_.tables_loader_background_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, new_server_settings.tables_loader_foreground_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, new_server_settings.tables_loader_background_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, new_server_settings.tables_loader_background_pool_size);
 
             getIOThreadPool().reloadConfiguration(
-                server_settings.max_io_thread_pool_size,
-                server_settings.max_io_thread_pool_free_size,
-                server_settings.io_thread_pool_queue_size);
+                new_server_settings.max_io_thread_pool_size,
+                new_server_settings.max_io_thread_pool_free_size,
+                new_server_settings.io_thread_pool_queue_size);
 
             getBackupsIOThreadPool().reloadConfiguration(
-                server_settings.max_backups_io_thread_pool_size,
-                server_settings.max_backups_io_thread_pool_free_size,
-                server_settings.backups_io_thread_pool_queue_size);
+                new_server_settings.max_backups_io_thread_pool_size,
+                new_server_settings.max_backups_io_thread_pool_free_size,
+                new_server_settings.backups_io_thread_pool_queue_size);
 
             getActivePartsLoadingThreadPool().reloadConfiguration(
-                server_settings.max_active_parts_loading_thread_pool_size,
+                new_server_settings.max_active_parts_loading_thread_pool_size,
                 0, // We don't need any threads once all the parts will be loaded
-                server_settings.max_active_parts_loading_thread_pool_size);
+                new_server_settings.max_active_parts_loading_thread_pool_size);
 
             getOutdatedPartsLoadingThreadPool().reloadConfiguration(
-                server_settings.max_outdated_parts_loading_thread_pool_size,
+                new_server_settings.max_outdated_parts_loading_thread_pool_size,
                 0, // We don't need any threads once all the parts will be loaded
-                server_settings.max_outdated_parts_loading_thread_pool_size);
+                new_server_settings.max_outdated_parts_loading_thread_pool_size);
 
             /// It could grow if we need to synchronously wait until all the data parts will be loaded.
             getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
-                server_settings.max_active_parts_loading_thread_pool_size
+                new_server_settings.max_active_parts_loading_thread_pool_size
             );
 
             getPartsCleaningThreadPool().reloadConfiguration(
-                server_settings.max_parts_cleaning_thread_pool_size,
+                new_server_settings.max_parts_cleaning_thread_pool_size,
                 0, // We don't need any threads one all the parts will be deleted
-                server_settings.max_parts_cleaning_thread_pool_size);
+                new_server_settings.max_parts_cleaning_thread_pool_size);
 
             if (config->has("resources"))
             {

From 76b7cddb186ba6d44e581fa35dce9fd48fc6b3ed Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:29:43 +0000
Subject: [PATCH 160/204] Update docs

---
 docs/en/sql-reference/functions/hash-functions.md | 4 +++-
 src/Functions/sqid.cpp                            | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index a23849c13aa..2c6a468af0e 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -1779,7 +1779,9 @@ Result:
 
 ## sqid
 
-Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).
+Transforms numbers into a [Sqid](https://sqids.org/) which is a YouTube-like ID string.
+The output alphabet is `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`.
+Do not use this function for hashing - the generated IDs can be decoded back into numbers.
 
 **Syntax**
 
diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index abd9d22f4c5..363a3f8ac13 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -84,7 +84,7 @@ REGISTER_FUNCTION(Sqid)
 {
     factory.registerFunction<FunctionSqid>(FunctionDocumentation{
         .description=R"(
-Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).)",
+Transforms numbers into a [Sqid](https://sqids.org/) which is a Youtube-like ID string.)",
         .syntax="sqid(number1, ...)",
         .arguments={{"number1, ...", "Arbitrarily many UInt8, UInt16, UInt32 or UInt64 arguments"}},
         .returned_value="A hash id [String](/docs/en/sql-reference/data-types/string.md).",

From 9f5015737bcb9fdb3a0d0d8056ca05dfc0c1302a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 3 Jan 2024 23:46:13 +0100
Subject: [PATCH 161/204] fix a stupid case of intersecting parts

---
 src/Storages/MergeTree/MergeTreeData.cpp         | 14 +++++++++-----
 ...02486_truncate_and_unexpected_parts.reference |  2 ++
 .../02486_truncate_and_unexpected_parts.sql      | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1c80778f1ca..a23d59055ca 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3985,8 +3985,15 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
     /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
     removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
 
+    /// We can only create a covering part for a blocks range that starts with 0 (otherwise we may get "intersecting parts"
+    /// if we remove a range from the middle when dropping a part).
+    /// Maybe we could do it by incrementing mutation version to get a name for the empty covering part,
+    /// but it's okay to simply avoid creating it for DROP PART (for a part in the middle).
+    /// NOTE: Block numbers in ReplicatedMergeTree start from 0. For MergeTree, is_new_syntax is always false.
+    assert(!create_empty_part || supportsReplication());
+    bool range_in_the_middle = drop_range.min_block;
     bool is_new_syntax = format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
-    if (create_empty_part && !parts_to_remove.empty() && is_new_syntax)
+    if (create_empty_part && !parts_to_remove.empty() && is_new_syntax && !range_in_the_middle)
     {
         /// We are going to remove a lot of parts from zookeeper just after returning from this function.
         /// And we will remove parts from disk later (because some queries may use them).
@@ -3995,12 +4002,9 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         /// We don't need to commit it to zk, and don't even need to activate it.
 
         MergeTreePartInfo empty_info = drop_range;
-        empty_info.level = empty_info.mutation = 0;
-        if (!empty_info.min_block)
-            empty_info.min_block = MergeTreePartInfo::MAX_BLOCK_NUMBER;
+        empty_info.min_block = empty_info.level = empty_info.mutation = 0;
         for (const auto & part : parts_to_remove)
         {
-            empty_info.min_block = std::min(empty_info.min_block, part->info.min_block);
             empty_info.level = std::max(empty_info.level, part->info.level);
             empty_info.mutation = std::max(empty_info.mutation, part->info.mutation);
         }
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
index 2ece1147d78..824d4bbec98 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
@@ -13,3 +13,5 @@
 5	rmt2
 7	rmt2
 9	rmt2
+1
+3
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index 52e8be236c8..755cba2a155 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -50,3 +50,19 @@ system sync replica rmt1;
 system sync replica rmt2;
 
 select *, _table from merge(currentDatabase(), '') order by _table, (*,);
+
+
+create table rmt3 (n int) engine=ReplicatedMergeTree('/test/02468/{database}3', '1') order by tuple() settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+set insert_keeper_fault_injection_probability=0;
+insert into rmt3 values (1);
+insert into rmt3 values (2);
+insert into rmt3 values (3);
+
+system stop cleanup rmt3;
+alter table rmt3 drop part 'all_1_1_0';
+optimize table rmt3 final;
+
+detach table rmt3 sync;
+attach table rmt3;
+
+select * from rmt3 order by n;

From 9149072520f979c7744f1c5222950f83de8365ff Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 17:27:26 +0000
Subject: [PATCH 162/204] Update tests

---
 ...f_indexes_support_match_function.reference | 12 +++
 ...ngrambf_indexes_support_match_function.sql | 98 ++++++++++++++++---
 2 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 5c6a213a03f..1cf1644fe0a 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,8 +2,12 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
 ---
@@ -13,14 +17,22 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index df39be8abd6..49d39c601ef 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -1,4 +1,3 @@
-SET allow_experimental_analyzer = 1;
 DROP TABLE IF EXISTS tokenbf_tab;
 DROP TABLE IF EXISTS ngrambf_tab;
 
@@ -28,7 +27,7 @@ INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3,
 SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
 
--- Skip 2/6 granules
+-- Read 2/6 granules
 -- Required string: 'Hello '
 -- Alternatives: 'Hello ClickHouse', 'Hello World'
 
@@ -39,7 +38,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -48,14 +60,28 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
+
 
 SELECT '---';
 
 SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
 
--- Skip 3/6 granules
+-- Read 3/6 granules
 -- Required string: -
 -- Alternatives: 'ClickHouse', 'World'
 
@@ -66,7 +92,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -75,18 +114,30 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT '---';
 
 SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 
--- Skip 5/6 granules
+-- Read 1/6 granules
 -- Required string: 'OLAP'
 -- Alternatives: -
 
-set allow_experimental_analyzer = 1;
 SELECT *
 FROM
 (
@@ -94,7 +145,19 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -103,7 +166,20 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 DROP TABLE tokenbf_tab;
 DROP TABLE ngrambf_tab;

From 85ea5c1f82dc85a7f44de74be84fb47e728fb5e5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 4 Jan 2024 17:46:06 +0000
Subject: [PATCH 163/204] Remove more code

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  8 +-
 .../QueryPlan/ReadFromMergeTree.cpp           | 22 +++--
 src/Storages/MergeTree/KeyCondition.cpp       | 93 ++-----------------
 src/Storages/MergeTree/KeyCondition.h         | 25 -----
 src/Storages/MergeTree/MergeTreeData.cpp      | 11 ++-
 5 files changed, 32 insertions(+), 127 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index ca16c550257..4514147493b 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2382,17 +2382,19 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
         if (analysis_result.hasPrewhere())
         {
             auto & prewhere_info = analysis_result.prewhere_info;
-            filter_nodes.push_back(prewhere_info->prewhere_actions->tryFindInOutputs(prewhere_info->prewhere_column_name));
+            filter_nodes.push_back(&prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name));
 
             if (prewhere_info->row_level_filter)
-                filter_nodes.push_back(prewhere_info->row_level_filter->tryFindInOutputs(prewhere_info->row_level_column_name));
+                filter_nodes.push_back(&prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name));
         }
         if (analysis_result.hasWhere())
         {
-            filter_nodes.push_back(analysis_result.before_where->tryFindInOutputs(analysis_result.where_column_name));
+            filter_nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name));
         }
 
         auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes, {}, context);
+        if (!filter_actions_dag)
+            return {};
 
         return storage->totalRowsByPartitionPredicate(filter_actions_dag, context);
     }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index b409e857d9a..62f25425385 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1363,14 +1363,18 @@ static void buildIndexes(
     if (!indexes->use_skip_indexes)
         return;
 
-    const SelectQueryInfo * info = &query_info;
     std::optional<SelectQueryInfo> info_copy;
-    if (settings.allow_experimental_analyzer)
+    auto get_query_info = [&]() -> const SelectQueryInfo &
     {
-        info_copy.emplace(query_info);
-        info_copy->filter_actions_dag = filter_actions_dag;
-        info = &*info_copy;
-    }
+        if (settings.allow_experimental_analyzer)
+        {
+            info_copy.emplace(query_info);
+            info_copy->filter_actions_dag = filter_actions_dag;
+            return *info_copy;
+        }
+
+        return query_info;
+    };
 
     std::unordered_set<std::string> ignored_index_names;
 
@@ -1411,7 +1415,7 @@ static void buildIndexes(
                 if (inserted)
                 {
                     skip_indexes.merged_indices.emplace_back();
-                    skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(*info, metadata_snapshot);
+                    skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(get_query_info(), metadata_snapshot);
                 }
 
                 skip_indexes.merged_indices[it->second].addIndex(index_helper);
@@ -1423,11 +1427,11 @@ static void buildIndexes(
                 {
 #ifdef ENABLE_ANNOY
                     if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
-                        condition = annoy->createIndexCondition(*info, context);
+                        condition = annoy->createIndexCondition(get_query_info(), context);
 #endif
 #ifdef ENABLE_USEARCH
                     if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
-                        condition = usearch->createIndexCondition(*info, context);
+                        condition = usearch->createIndexCondition(get_query_info(), context);
 #endif
                     if (!condition)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 1cc672fb98f..d5922ae1bc2 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -762,92 +762,6 @@ void KeyCondition::getAllSpaceFillingCurves()
     }
 }
 
-KeyCondition::KeyCondition(
-    const ASTPtr & query,
-    const ASTs & additional_filter_asts,
-    Block block_with_constants,
-    PreparedSetsPtr prepared_sets,
-    ContextPtr context,
-    const Names & key_column_names,
-    const ExpressionActionsPtr & key_expr_,
-    NameSet array_joined_column_names_,
-    bool single_point_,
-    bool strict_)
-    : key_expr(key_expr_)
-    , key_subexpr_names(getAllSubexpressionNames(*key_expr))
-    , array_joined_column_names(std::move(array_joined_column_names_))
-    , single_point(single_point_)
-    , strict(strict_)
-{
-    size_t key_index = 0;
-    for (const auto & name : key_column_names)
-    {
-        if (!key_columns.contains(name))
-        {
-            key_columns[name] = key_columns.size();
-            key_indices.push_back(key_index);
-        }
-        ++key_index;
-    }
-
-    if (context->getSettingsRef().analyze_index_with_space_filling_curves)
-        getAllSpaceFillingCurves();
-
-    ASTPtr filter_node;
-    if (query)
-        filter_node = buildFilterNode(query, additional_filter_asts);
-
-    if (!filter_node)
-    {
-        has_filter = false;
-        rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
-        return;
-    }
-
-    has_filter = true;
-
-    /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
-      * the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
-      * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
-      * when parsing the AST into internal RPN representation.
-      * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
-      * are pushed down and applied (when possible) to leaf nodes.
-      */
-    auto inverted_filter_node = DB::cloneASTWithInversionPushDown(filter_node);
-
-    RPNBuilder<RPNElement> builder(
-        inverted_filter_node,
-        std::move(context),
-        std::move(block_with_constants),
-        std::move(prepared_sets),
-        [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
-
-    rpn = std::move(builder).extractRPN();
-
-    findHyperrectanglesForArgumentsOfSpaceFillingCurves();
-}
-
-KeyCondition::KeyCondition(
-    const SelectQueryInfo & query_info,
-    ContextPtr context,
-    const Names & key_column_names,
-    const ExpressionActionsPtr & key_expr_,
-    bool single_point_,
-    bool strict_)
-    : KeyCondition(
-        query_info.query,
-        query_info.filter_asts,
-        KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context),
-        query_info.prepared_sets,
-        context,
-        key_column_names,
-        key_expr_,
-        query_info.syntax_analyzer_result ? query_info.syntax_analyzer_result->getArrayJoinSourceNameSet() : NameSet{},
-        single_point_,
-        strict_)
-{
-}
-
 KeyCondition::KeyCondition(
     ActionsDAGPtr filter_dag,
     ContextPtr context,
@@ -883,6 +797,13 @@ KeyCondition::KeyCondition(
 
     has_filter = true;
 
+    /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
+      * the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
+      * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
+      * when parsing the AST into internal RPN representation.
+      * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
+      * are pushed down and applied (when possible) to leaf nodes.
+      */
     auto inverted_dag = cloneASTWithInversionPushDown({filter_dag->getOutputs().at(0)}, context);
     assert(inverted_dag->getOutputs().size() == 1);
 
diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h
index e291eb7b98c..6e248dd664a 100644
--- a/src/Storages/MergeTree/KeyCondition.h
+++ b/src/Storages/MergeTree/KeyCondition.h
@@ -38,31 +38,6 @@ struct ActionDAGNodes;
   */
 class KeyCondition
 {
-private:
-    /// Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters
-    KeyCondition(
-        const ASTPtr & query,
-        const ASTs & additional_filter_asts,
-        Block block_with_constants,
-        PreparedSetsPtr prepared_sets_,
-        ContextPtr context,
-        const Names & key_column_names,
-        const ExpressionActionsPtr & key_expr,
-        NameSet array_joined_column_names,
-        bool single_point_ = false,
-        bool strict_ = false);
-
-    /** Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters.
-      * Select query, additional filters, prepared sets are initialized using query info.
-      */
-    KeyCondition(
-        const SelectQueryInfo & query_info,
-        ContextPtr context,
-        const Names & key_column_names,
-        const ExpressionActionsPtr & key_expr_,
-        bool single_point_ = false,
-        bool strict_ = false);
-
 public:
     /// Construct key condition from ActionsDAG nodes
     KeyCondition(
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 80e0e430d19..0a9dd49c621 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1083,17 +1083,20 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
     Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
     // Generate valid expressions for filtering
-    auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), virtual_columns_block);
+    bool valid = true;
+    for (const auto * input : filter_actions_dag->getInputs())
+        if (!virtual_columns_block.has(input->result_name))
+            valid = false;
 
     PartitionPruner partition_pruner(metadata_snapshot, filter_actions_dag, local_context, true /* strict */);
-    if (partition_pruner.isUseless() && !filter_dag)
+    if (partition_pruner.isUseless() && !valid)
         return {};
 
     std::unordered_set<String> part_values;
-    if (filter_dag)
+    if (valid)
     {
         virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
-        VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
+        VirtualColumnUtils::filterBlockWithDAG(filter_actions_dag, virtual_columns_block, local_context);
         part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
         if (part_values.empty())
             return 0;

From 491df7bf6e7d8321d2694d76c522f1520871326d Mon Sep 17 00:00:00 2001
From: Jihyuk Bok <jihyuk.bok@clickhouse.com>
Date: Thu, 4 Jan 2024 18:46:55 +0100
Subject: [PATCH 164/204] enable ordinary databases while restoration

---
 src/Backups/RestorerFromBackup.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp
index 4e580e493a7..a33773f19ab 100644
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@@ -573,11 +573,12 @@ void RestorerFromBackup::createDatabase(const String & database_name) const
     create_database_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists);
 
     LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
-
+    auto query_context = Context::createCopy(context);
+    query_context->setSetting("allow_deprecated_database_ordinary", 1);
     try
     {
         /// Execute CREATE DATABASE query.
-        InterpreterCreateQuery interpreter{create_database_query, context};
+        InterpreterCreateQuery interpreter{create_database_query, query_context};
         interpreter.setInternal(true);
         interpreter.execute();
     }

From bc1c05e4cd4492d6e8735345000d3a50809047d1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Jan 2024 20:15:52 +0100
Subject: [PATCH 165/204] Update 02486_truncate_and_unexpected_parts.sql

---
 .../queries/0_stateless/02486_truncate_and_unexpected_parts.sql  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index 755cba2a155..5c90313b6b8 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -59,6 +59,7 @@ insert into rmt3 values (2);
 insert into rmt3 values (3);
 
 system stop cleanup rmt3;
+system sync replica rmt3 pull;
 alter table rmt3 drop part 'all_1_1_0';
 optimize table rmt3 final;
 

From 2a385bc573879a45b2ca8332d4abb91c9b862609 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 4 Jan 2024 18:26:25 +0100
Subject: [PATCH 166/204] Fix currentProfiles()

---
 src/Access/SettingsProfilesCache.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index 9f4fc5a5d89..275b3aeb6b5 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -140,7 +140,6 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
 
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
-    info->profiles = merged_settings.toProfileIDs();
     substituteProfiles(merged_settings, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
 
     info->settings = merged_settings.toSettingsChanges();
@@ -156,6 +155,8 @@ void SettingsProfilesCache::substituteProfiles(
     std::vector<UUID> & substituted_profiles,
     std::unordered_map<UUID, String> & names_of_substituted_profiles) const
 {
+    profiles = elements.toProfileIDs();
+
     /// We should substitute profiles in reversive order because the same profile can occur
     /// in `elements` multiple times (with some other settings in between) and in this case
     /// the last occurrence should override all the previous ones.
@@ -231,12 +232,12 @@ std::shared_ptr<const SettingsProfilesInfo> SettingsProfilesCache::getSettingsPr
     if (auto pos = this->profile_infos_cache.get(profile_id))
         return *pos;
 
-    SettingsProfileElements elements = all_profiles[profile_id]->elements;
+    SettingsProfileElements elements;
+    auto & element = elements.emplace_back();
+    element.parent_profile = profile_id;
 
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
-    info->profiles.push_back(profile_id);
-    info->profiles_with_implicit.push_back(profile_id);
     substituteProfiles(elements, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
     info->settings = elements.toSettingsChanges();
     info->constraints.merge(elements.toSettingsConstraints(access_control));

From 362133af1f25143eccc290734847fad15dbbbce6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 Jan 2024 01:54:05 +0100
Subject: [PATCH 167/204] Update tests

---
 .../00918_json_functions.reference            |  4 +--
 .../0_stateless/01825_type_json_10.reference  |  2 +-
 .../0_stateless/01825_type_json_11.reference  |  2 +-
 .../0_stateless/01825_type_json_12.reference  |  2 +-
 .../0_stateless/01825_type_json_13.reference  |  2 +-
 .../0_stateless/01825_type_json_15.reference  |  2 +-
 .../0_stateless/01825_type_json_16.reference  |  2 +-
 .../0_stateless/01825_type_json_17.reference  |  6 ++---
 .../0_stateless/01825_type_json_18.reference  |  4 +--
 .../0_stateless/01825_type_json_2.reference   | 22 ++++++++--------
 .../0_stateless/01825_type_json_4.reference   |  2 +-
 .../0_stateless/01825_type_json_5.reference   |  2 +-
 .../0_stateless/01825_type_json_6.reference   |  2 +-
 .../0_stateless/01825_type_json_7.reference   |  2 +-
 .../0_stateless/01825_type_json_8.reference   |  4 +--
 .../0_stateless/01825_type_json_9.reference   |  2 +-
 .../01825_type_json_bools.reference           |  2 +-
 .../0_stateless/01825_type_json_btc.reference |  2 +-
 .../01825_type_json_describe.reference        |  4 +--
 .../01825_type_json_distributed.reference     |  4 +--
 .../01825_type_json_field.reference           |  6 ++---
 .../01825_type_json_from_map.reference        |  2 +-
 .../01825_type_json_in_array.reference        |  4 +--
 .../01825_type_json_in_other_types.reference  |  2 +-
 .../01825_type_json_insert_select.reference   | 10 +++----
 .../01825_type_json_missed_values.reference   |  2 +-
 .../01825_type_json_multiple_files.reference  |  6 ++---
 .../01825_type_json_nbagames.reference        |  2 +-
 .../01825_type_json_nullable.reference        | 16 ++++++------
 .../01825_type_json_parallel_insert.reference |  2 +-
 ...01825_type_json_schema_inference.reference |  2 +-
 .../02149_external_schema_inference.reference | 26 +++++++++----------
 .../02149_schema_inference.reference          | 10 +++----
 ..._inference_formats_with_schema_1.reference | 24 ++++++++---------
 ...arquet_nullable_schema_inference.reference |  8 +++---
 .../0_stateless/02246_flatten_tuple.reference |  4 +--
 .../02287_type_object_convert.reference       | 18 ++++++-------
 .../02313_avro_records_and_maps.reference     |  8 +++---
 .../02314_avro_null_as_default.reference      |  2 +-
 .../02325_dates_schema_inference.reference    |  4 +--
 ..._infer_integers_schema_inference.reference |  6 ++---
 .../02421_type_json_empty_parts.reference     | 10 +++----
 .../02521_avro_union_null_nested.reference    |  2 +-
 .../02522_avro_complicate_schema.reference    |  2 +-
 ...2900_union_schema_inference_mode.reference |  8 +++---
 .../02906_orc_tuple_field_prune.reference     |  6 ++---
 46 files changed, 133 insertions(+), 133 deletions(-)

diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference
index be8e603f8dc..5264d51fa73 100644
--- a/tests/queries/0_stateless/00918_json_functions.reference
+++ b/tests/queries/0_stateless/00918_json_functions.reference
@@ -69,8 +69,8 @@ hello
 123456.1234	Decimal(20, 4)
 123456.1234	Decimal(20, 4)
 123456789012345.12	Decimal(30, 4)
-(1234567890.1234567890123456789,'test')	Tuple(a Decimal(35, 20), b LowCardinality(String))
-(1234567890.12345678901234567890123456789,'test')	Tuple(a Decimal(45, 30), b LowCardinality(String))
+(1234567890.1234567890123456789,'test')	Tuple(\n    a Decimal(35, 20),\n    b LowCardinality(String))
+(1234567890.12345678901234567890123456789,'test')	Tuple(\n    a Decimal(45, 30),\n    b LowCardinality(String))
 123456789012345.1136	123456789012345.1136
 1234567890.12345677879616925706	(1234567890.12345677879616925706,'test')
 1234567890.123456695758468374595199311875	(1234567890.123456695758468374595199311875,'test')
diff --git a/tests/queries/0_stateless/01825_type_json_10.reference b/tests/queries/0_stateless/01825_type_json_10.reference
index 53fe604fa51..4161fb59c93 100644
--- a/tests/queries/0_stateless/01825_type_json_10.reference
+++ b/tests/queries/0_stateless/01825_type_json_10.reference
@@ -1,4 +1,4 @@
-Tuple(a Tuple(b Int8, c Nested(d Int8, e Array(Int16), f Int8)))
+Tuple(\n    a Tuple(\n        b Int8,\n        c Nested(d Int8, e Array(Int16), f Int8)))
 {"o":{"a":{"b":1,"c":[{"d":10,"e":[31],"f":0},{"d":20,"e":[63,127],"f":0}]}}}
 {"o":{"a":{"b":2,"c":[]}}}
 {"o":{"a":{"b":3,"c":[{"d":0,"e":[32],"f":20},{"d":0,"e":[64,128],"f":30}]}}}
diff --git a/tests/queries/0_stateless/01825_type_json_11.reference b/tests/queries/0_stateless/01825_type_json_11.reference
index 27569620cd7..0575743e019 100644
--- a/tests/queries/0_stateless/01825_type_json_11.reference
+++ b/tests/queries/0_stateless/01825_type_json_11.reference
@@ -1,4 +1,4 @@
-Tuple(id Int8, key_1 Nested(key_2 Int32, key_3 Nested(key_4 Nested(key_5 Int8), key_7 Int16)))
+Tuple(\n    id Int8,\n    key_1 Nested(key_2 Int32, key_3 Nested(key_4 Nested(key_5 Int8), key_7 Int16)))
 {"obj":{"id":1,"key_1":[{"key_2":100,"key_3":[{"key_4":[{"key_5":-2}],"key_7":257}]},{"key_2":65536,"key_3":[]}]}}
 {"obj":{"id":2,"key_1":[{"key_2":101,"key_3":[{"key_4":[{"key_5":-2}],"key_7":0}]},{"key_2":102,"key_3":[{"key_4":[],"key_7":257}]},{"key_2":65536,"key_3":[]}]}}
 {"obj.key_1.key_3":[[{"key_4":[{"key_5":-2}],"key_7":257}],[]]}
diff --git a/tests/queries/0_stateless/01825_type_json_12.reference b/tests/queries/0_stateless/01825_type_json_12.reference
index 7f4f5bf190e..ff60ba33f94 100644
--- a/tests/queries/0_stateless/01825_type_json_12.reference
+++ b/tests/queries/0_stateless/01825_type_json_12.reference
@@ -1,3 +1,3 @@
-Tuple(id Int8, key_0 Nested(key_1 Nested(key_3 Nested(key_4 String, key_5 Float64, key_6 String, key_7 Float64))))
+Tuple(\n    id Int8,\n    key_0 Nested(key_1 Nested(key_3 Nested(key_4 String, key_5 Float64, key_6 String, key_7 Float64))))
 {"obj":{"id":1,"key_0":[{"key_1":[{"key_3":[{"key_4":"1048576","key_5":0.0001048576,"key_6":"25.5","key_7":1025},{"key_4":"","key_5":0,"key_6":"","key_7":2}]}]},{"key_1":[]},{"key_1":[{"key_3":[{"key_4":"","key_5":-1,"key_6":"aqbjfiruu","key_7":-922337203685477600},{"key_4":"","key_5":0,"key_6":"","key_7":65537}]},{"key_3":[{"key_4":"ghdqyeiom","key_5":1048575,"key_6":"","key_7":21474836.48}]}]}]}}
 [[['1048576','']],[],[['',''],['ghdqyeiom']]]	[[[0.0001048576,0]],[],[[-1,0],[1048575]]]	[[['25.5','']],[],[['aqbjfiruu',''],['']]]	[[[1025,2]],[],[[-922337203685477600,65537],[21474836.48]]]
diff --git a/tests/queries/0_stateless/01825_type_json_13.reference b/tests/queries/0_stateless/01825_type_json_13.reference
index e420021f406..fa105f1a4c6 100644
--- a/tests/queries/0_stateless/01825_type_json_13.reference
+++ b/tests/queries/0_stateless/01825_type_json_13.reference
@@ -1,3 +1,3 @@
-Tuple(id Int8, key_1 Nested(key_2 Nested(key_3 Nested(key_4 Nested(key_5 Float64, key_6 Int64, key_7 Int32), key_8 Int32))))
+Tuple(\n    id Int8,\n    key_1 Nested(key_2 Nested(key_3 Nested(key_4 Nested(key_5 Float64, key_6 Int64, key_7 Int32), key_8 Int32))))
 {"obj":{"id":1,"key_1":[{"key_2":[{"key_3":[{"key_4":[],"key_8":65537},{"key_4":[{"key_5":-0.02,"key_6":"0","key_7":0},{"key_5":0,"key_6":"0","key_7":1023},{"key_5":0,"key_6":"9223372036854775807","key_7":1}],"key_8":0},{"key_4":[{"key_5":0,"key_6":"0","key_7":65537}],"key_8":0}]}]}]}}
 [[[65537,0,0]]]	[[[[],[-0.02,0,0],[0]]]]	[[[[],[0,0,9223372036854775807],[0]]]]	[[[[],[0,1023,1],[65537]]]]
diff --git a/tests/queries/0_stateless/01825_type_json_15.reference b/tests/queries/0_stateless/01825_type_json_15.reference
index ab4b1b82877..4f13731d35a 100644
--- a/tests/queries/0_stateless/01825_type_json_15.reference
+++ b/tests/queries/0_stateless/01825_type_json_15.reference
@@ -1,3 +1,3 @@
-Tuple(id Int8, key_0 Nested(key_0 Float64, key_1 Tuple(key_2 Array(Int8), key_8 String), key_10 Float64))
+Tuple(\n    id Int8,\n    key_0 Nested(key_0 Float64, key_1 Tuple(key_2 Array(Int8), key_8 String), key_10 Float64))
 {"obj":{"id":1,"key_0":[{"key_0":-1,"key_1":{"key_2":[1,2,3],"key_8":"sffjx"},"key_10":65535},{"key_0":922337203.685,"key_1":{"key_2":[],"key_8":""},"key_10":10.23}]}}
 [[1,2,3],[]]	['sffjx','']	[65535,10.23]	[-1,922337203.685]
diff --git a/tests/queries/0_stateless/01825_type_json_16.reference b/tests/queries/0_stateless/01825_type_json_16.reference
index f40f0d747d5..a8cc682f8e1 100644
--- a/tests/queries/0_stateless/01825_type_json_16.reference
+++ b/tests/queries/0_stateless/01825_type_json_16.reference
@@ -1,3 +1,3 @@
-Tuple(id Int8, key_0 Nested(key_1 Nested(key_2 Tuple(key_3 Nested(key_4 Int32, key_6 Int8, key_7 Int16), key_5 Nested(key_6 Int8, key_7 String)))))
+Tuple(\n    id Int8,\n    key_0 Nested(key_1 Nested(key_2 Tuple(key_3 Nested(key_4 Int32, key_6 Int8, key_7 Int16), key_5 Nested(key_6 Int8, key_7 String)))))
 {"obj":{"id":1,"key_0":[{"key_1":[{"key_2":{"key_3":[{"key_4":255,"key_6":0,"key_7":0},{"key_4":65535,"key_6":0,"key_7":0},{"key_4":0,"key_6":3,"key_7":255}],"key_5":[{"key_6":1,"key_7":"nnpqx"},{"key_6":3,"key_7":"255"}]}}]}]}}
 [[[255,65535,0]]]	[[[0,0,3]]]	[[[0,0,255]]]	[[[1,3]]]	[[['nnpqx','255']]]
diff --git a/tests/queries/0_stateless/01825_type_json_17.reference b/tests/queries/0_stateless/01825_type_json_17.reference
index 0f97bfed5bc..c830cf41cf1 100644
--- a/tests/queries/0_stateless/01825_type_json_17.reference
+++ b/tests/queries/0_stateless/01825_type_json_17.reference
@@ -1,4 +1,4 @@
-Tuple(arr Nested(k1 Nested(k2 String, k3 String, k4 Int8), k5 Tuple(k6 String)), id Int8)
+Tuple(\n    arr Nested(k1 Nested(k2 String, k3 String, k4 Int8), k5 Tuple(k6 String)),\n    id Int8)
 {"obj":{"arr":[{"k1":[{"k2":"aaa","k3":"bbb","k4":0},{"k2":"ccc","k3":"","k4":0}],"k5":{"k6":""}}],"id":1}}
 {"obj":{"arr":[{"k1":[{"k2":"","k3":"ddd","k4":10},{"k2":"","k3":"","k4":20}],"k5":{"k6":"foo"}}],"id":2}}
 [['bbb','']]	[['aaa','ccc']]
@@ -6,7 +6,7 @@ Tuple(arr Nested(k1 Nested(k2 String, k3 String, k4 Int8), k5 Tuple(k6 String)),
 1
 [[0,0]]
 [[10,20]]
-Tuple(arr Nested(k1 Nested(k2 String, k3 Nested(k4 Int8))), id Int8)
+Tuple(\n    arr Nested(k1 Nested(k2 String, k3 Nested(k4 Int8))),\n    id Int8)
 {"obj":{"arr":[{"k1":[{"k2":"aaa","k3":[]}]}],"id":1}}
 {"obj":{"arr":[{"k1":[{"k2":"bbb","k3":[{"k4":10}]},{"k2":"ccc","k3":[{"k4":20}]}]}],"id":2}}
 [['aaa']]	[[[]]]
@@ -14,7 +14,7 @@ Tuple(arr Nested(k1 Nested(k2 String, k3 Nested(k4 Int8))), id Int8)
 1
 [[[]]]
 [[[10],[20]]]
-Tuple(arr Nested(k1 Nested(k2 String, k4 Nested(k5 Int8)), k3 String), id Int8)
+Tuple(\n    arr Nested(k1 Nested(k2 String, k4 Nested(k5 Int8)), k3 String),\n    id Int8)
 {"obj":{"arr":[{"k1":[],"k3":"qqq"},{"k1":[],"k3":"www"}],"id":1}}
 {"obj":{"arr":[{"k1":[{"k2":"aaa","k4":[]}],"k3":"eee"}],"id":2}}
 {"obj":{"arr":[{"k1":[{"k2":"bbb","k4":[{"k5":10}]},{"k2":"ccc","k4":[{"k5":20}]}],"k3":"rrr"}],"id":3}}
diff --git a/tests/queries/0_stateless/01825_type_json_18.reference b/tests/queries/0_stateless/01825_type_json_18.reference
index d93f9bda63c..d61baf5eb6f 100644
--- a/tests/queries/0_stateless/01825_type_json_18.reference
+++ b/tests/queries/0_stateless/01825_type_json_18.reference
@@ -1,2 +1,2 @@
-1	(1)	Tuple(k1 Int8)
-1	([1,2])	Tuple(k1 Array(Int8))
+1	(1)	Tuple(\n    k1 Int8)
+1	([1,2])	Tuple(\n    k1 Array(Int8))
diff --git a/tests/queries/0_stateless/01825_type_json_2.reference b/tests/queries/0_stateless/01825_type_json_2.reference
index 8524035a3a4..790d825a894 100644
--- a/tests/queries/0_stateless/01825_type_json_2.reference
+++ b/tests/queries/0_stateless/01825_type_json_2.reference
@@ -1,24 +1,24 @@
-1	(1,2,0)	Tuple(k1 Int8, k2 Int8, k3 Int8)
-2	(0,3,4)	Tuple(k1 Int8, k2 Int8, k3 Int8)
+1	(1,2,0)	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 Int8)
+2	(0,3,4)	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 Int8)
 1	1	2	0
 2	0	3	4
-1	(1,2,'0')	Tuple(k1 Int8, k2 Int8, k3 String)
-2	(0,3,'4')	Tuple(k1 Int8, k2 Int8, k3 String)
-3	(0,0,'10')	Tuple(k1 Int8, k2 Int8, k3 String)
-4	(0,5,'str')	Tuple(k1 Int8, k2 Int8, k3 String)
+1	(1,2,'0')	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 String)
+2	(0,3,'4')	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 String)
+3	(0,0,'10')	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 String)
+4	(0,5,'str')	Tuple(\n    k1 Int8,\n    k2 Int8,\n    k3 String)
 1	1	2	0
 2	0	3	4
 3	0	0	10
 4	0	5	str
 ============
-1	([1,2,3.3])	Tuple(k1 Array(Float64))
+1	([1,2,3.3])	Tuple(\n    k1 Array(Float64))
 1	[1,2,3.3]
-1	(['1','2','3.3'])	Tuple(k1 Array(String))
-2	(['a','4','b'])	Tuple(k1 Array(String))
+1	(['1','2','3.3'])	Tuple(\n    k1 Array(String))
+2	(['a','4','b'])	Tuple(\n    k1 Array(String))
 1	['1','2','3.3']
 2	['a','4','b']
 ============
-1	([(11,0,0),(0,22,0)])	Tuple(k1 Nested(k2 Int8, k3 Int8, k4 Int8))
-2	([(0,33,0),(0,0,44),(0,55,66)])	Tuple(k1 Nested(k2 Int8, k3 Int8, k4 Int8))
+1	([(11,0,0),(0,22,0)])	Tuple(\n    k1 Nested(k2 Int8, k3 Int8, k4 Int8))
+2	([(0,33,0),(0,0,44),(0,55,66)])	Tuple(\n    k1 Nested(k2 Int8, k3 Int8, k4 Int8))
 1	[11,0]	[0,22]	[0,0]
 2	[0,0,0]	[33,0,55]	[0,44,66]
diff --git a/tests/queries/0_stateless/01825_type_json_4.reference b/tests/queries/0_stateless/01825_type_json_4.reference
index 1b23bf2213e..58b1d067a2b 100644
--- a/tests/queries/0_stateless/01825_type_json_4.reference
+++ b/tests/queries/0_stateless/01825_type_json_4.reference
@@ -1,5 +1,5 @@
 Code: 645
 Code: 15
 Code: 53
-1	('v1')	Tuple(k1 String)
+1	('v1')	Tuple(\n    k1 String)
 1	v1
diff --git a/tests/queries/0_stateless/01825_type_json_5.reference b/tests/queries/0_stateless/01825_type_json_5.reference
index 4ac0aa26ffd..3c21f2840a2 100644
--- a/tests/queries/0_stateless/01825_type_json_5.reference
+++ b/tests/queries/0_stateless/01825_type_json_5.reference
@@ -2,4 +2,4 @@
 {"s":{"a.b":1,"a.c":2}}
 1		[22,33]
 2	qqq	[44]
-Tuple(k1 Int8, k2 Tuple(k3 String, k4 Array(Int8)))
+Tuple(\n    k1 Int8,\n    k2 Tuple(\n        k3 String,\n        k4 Array(Int8)))
diff --git a/tests/queries/0_stateless/01825_type_json_6.reference b/tests/queries/0_stateless/01825_type_json_6.reference
index 7fcd2a40826..15e1ab3ac80 100644
--- a/tests/queries/0_stateless/01825_type_json_6.reference
+++ b/tests/queries/0_stateless/01825_type_json_6.reference
@@ -1,3 +1,3 @@
-Tuple(key String, out Nested(outputs Nested(index Int32, n Int8), type Int8, value Int8))
+Tuple(\n    key String,\n    out Nested(outputs Nested(index Int32, n Int8), type Int8, value Int8))
 v1	[0,0]	[1,2]	[[],[1960131]]	[[],[0]]
 v2	[1,1]	[4,3]	[[1881212],[]]	[[1],[]]
diff --git a/tests/queries/0_stateless/01825_type_json_7.reference b/tests/queries/0_stateless/01825_type_json_7.reference
index 263f1688a91..cf6b32d73e8 100644
--- a/tests/queries/0_stateless/01825_type_json_7.reference
+++ b/tests/queries/0_stateless/01825_type_json_7.reference
@@ -1,4 +1,4 @@
-Tuple(categories Array(String), key String)
+Tuple(\n    categories Array(String),\n    key String)
 v1	[]
 v2	['foo','bar']
 v3	[]
diff --git a/tests/queries/0_stateless/01825_type_json_8.reference b/tests/queries/0_stateless/01825_type_json_8.reference
index b64e6d0c9b9..27770317862 100644
--- a/tests/queries/0_stateless/01825_type_json_8.reference
+++ b/tests/queries/0_stateless/01825_type_json_8.reference
@@ -1,2 +1,2 @@
-([[(1,2),(3,4)],[(5,6)]])	Tuple(k1 Array(Nested(k2 Int8, k3 Int8)))
-([([1,3,4,5],[6,7]),([8],[9,10,11])])	Tuple(k1 Nested(k2 Array(Int8), k3 Array(Int8)))
+([[(1,2),(3,4)],[(5,6)]])	Tuple(\n    k1 Array(Nested(k2 Int8, k3 Int8)))
+([([1,3,4,5],[6,7]),([8],[9,10,11])])	Tuple(\n    k1 Nested(k2 Array(Int8), k3 Array(Int8)))
diff --git a/tests/queries/0_stateless/01825_type_json_9.reference b/tests/queries/0_stateless/01825_type_json_9.reference
index a426b09a100..f58a64eda5a 100644
--- a/tests/queries/0_stateless/01825_type_json_9.reference
+++ b/tests/queries/0_stateless/01825_type_json_9.reference
@@ -1 +1 @@
-Tuple(foo Int8, k1 Int8, k2 Int8)
+Tuple(\n    foo Int8,\n    k1 Int8,\n    k2 Int8)
diff --git a/tests/queries/0_stateless/01825_type_json_bools.reference b/tests/queries/0_stateless/01825_type_json_bools.reference
index bed8c2ad2c3..6b4d2382dc2 100644
--- a/tests/queries/0_stateless/01825_type_json_bools.reference
+++ b/tests/queries/0_stateless/01825_type_json_bools.reference
@@ -1 +1 @@
-(1,0)	Tuple(k1 UInt8, k2 UInt8)
+(1,0)	Tuple(\n    k1 UInt8,\n    k2 UInt8)
diff --git a/tests/queries/0_stateless/01825_type_json_btc.reference b/tests/queries/0_stateless/01825_type_json_btc.reference
index cee3b31a798..e85c0ef45bd 100644
--- a/tests/queries/0_stateless/01825_type_json_btc.reference
+++ b/tests/queries/0_stateless/01825_type_json_btc.reference
@@ -1,5 +1,5 @@
 100
-data	Tuple(double_spend UInt8, fee Int32, hash String, inputs Nested(index Int8, prev_out Tuple(addr String, n Int16, script String, spending_outpoints Nested(n Int8, tx_index Int64), spent UInt8, tx_index Int64, type Int8, value Int64), script String, sequence Int64, witness String), lock_time Int32, out Nested(addr String, n Int8, script String, spending_outpoints Nested(n Int8, tx_index Int64), spent UInt8, tx_index Int64, type Int8, value Int64), rbf UInt8, relayed_by String, size Int16, time Int32, tx_index Int64, ver Int8, vin_sz Int8, vout_sz Int8, weight Int16)					
+data	Tuple(\n    double_spend UInt8,\n    fee Int32,\n    hash String,\n    inputs Nested(index Int8, prev_out Tuple(addr String, n Int16, script String, spending_outpoints Nested(n Int8, tx_index Int64), spent UInt8, tx_index Int64, type Int8, value Int64), script String, sequence Int64, witness String),\n    lock_time Int32,\n    out Nested(addr String, n Int8, script String, spending_outpoints Nested(n Int8, tx_index Int64), spent UInt8, tx_index Int64, type Int8, value Int64),\n    rbf UInt8,\n    relayed_by String,\n    size Int16,\n    time Int32,\n    tx_index Int64,\n    ver Int8,\n    vin_sz Int8,\n    vout_sz Int8,\n    weight Int16)					
 8174.56	2680
 2.32	1
 [[],[(0,359661801933760)]]
diff --git a/tests/queries/0_stateless/01825_type_json_describe.reference b/tests/queries/0_stateless/01825_type_json_describe.reference
index 629b60cb629..98b2bf8be83 100644
--- a/tests/queries/0_stateless/01825_type_json_describe.reference
+++ b/tests/queries/0_stateless/01825_type_json_describe.reference
@@ -1,3 +1,3 @@
 data	Object(\'json\')					
-data	Tuple(k1 Int8)					
-data	Tuple(k1 String, k2 Array(Int8))					
+data	Tuple(\n    k1 Int8)					
+data	Tuple(\n    k1 String,\n    k2 Array(Int8))					
diff --git a/tests/queries/0_stateless/01825_type_json_distributed.reference b/tests/queries/0_stateless/01825_type_json_distributed.reference
index 9ae85ac888c..9735fec2fe5 100644
--- a/tests/queries/0_stateless/01825_type_json_distributed.reference
+++ b/tests/queries/0_stateless/01825_type_json_distributed.reference
@@ -1,4 +1,4 @@
-(2,('qqq',[44,55]))	Tuple(k1 Int8, k2 Tuple(k3 String, k4 Array(Int8)))
-(2,('qqq',[44,55]))	Tuple(k1 Int8, k2 Tuple(k3 String, k4 Array(Int8)))
+(2,('qqq',[44,55]))	Tuple(\n    k1 Int8,\n    k2 Tuple(\n        k3 String,\n        k4 Array(Int8)))
+(2,('qqq',[44,55]))	Tuple(\n    k1 Int8,\n    k2 Tuple(\n        k3 String,\n        k4 Array(Int8)))
 2	qqq	[44,55]
 2	qqq	[44,55]
diff --git a/tests/queries/0_stateless/01825_type_json_field.reference b/tests/queries/0_stateless/01825_type_json_field.reference
index b5637b1fbb7..8afd0110b63 100644
--- a/tests/queries/0_stateless/01825_type_json_field.reference
+++ b/tests/queries/0_stateless/01825_type_json_field.reference
@@ -1,12 +1,12 @@
 1	10	a
-Tuple(a UInt8, s String)
+Tuple(\n    a UInt8,\n    s String)
 1	10	a	0
 2	sss	b	300
 3	20	c	0
-Tuple(a String, b UInt16, s String)
+Tuple(\n    a String,\n    b UInt16,\n    s String)
 1	10	a	0	
 2	sss	b	300	
 3	20	c	0	
 4	30		400	
 5	0	qqq	0	foo
-Tuple(a String, b UInt16, s String, t String)
+Tuple(\n    a String,\n    b UInt16,\n    s String,\n    t String)
diff --git a/tests/queries/0_stateless/01825_type_json_from_map.reference b/tests/queries/0_stateless/01825_type_json_from_map.reference
index dbcf67faef3..90680ee383b 100644
--- a/tests/queries/0_stateless/01825_type_json_from_map.reference
+++ b/tests/queries/0_stateless/01825_type_json_from_map.reference
@@ -1,4 +1,4 @@
 800000	2000000	1400000	900000
 800000	2000000	1400000	900000
-Tuple(col0 UInt64, col1 UInt64, col2 UInt64, col3 UInt64, col4 UInt64, col5 UInt64, col6 UInt64, col7 UInt64, col8 UInt64)
+Tuple(\n    col0 UInt64,\n    col1 UInt64,\n    col2 UInt64,\n    col3 UInt64,\n    col4 UInt64,\n    col5 UInt64,\n    col6 UInt64,\n    col7 UInt64,\n    col8 UInt64)
 1600000	4000000	2800000	1800000
diff --git a/tests/queries/0_stateless/01825_type_json_in_array.reference b/tests/queries/0_stateless/01825_type_json_in_array.reference
index c36a22e6951..82207f53a21 100644
--- a/tests/queries/0_stateless/01825_type_json_in_array.reference
+++ b/tests/queries/0_stateless/01825_type_json_in_array.reference
@@ -5,7 +5,7 @@
 {"arr":{"k1":1,"k2":{"k3":2,"k4":3,"k5":""}}}
 {"arr":{"k1":2,"k2":{"k3":0,"k4":0,"k5":"foo"}}}
 {"arr":{"k1":3,"k2":{"k3":4,"k4":5,"k5":""}}}
-Array(Tuple(k1 Int8, k2 Tuple(k3 Int8, k4 Int8, k5 String)))
+Array(Tuple(\n    k1 Int8,\n    k2 Tuple(\n        k3 Int8,\n        k4 Int8,\n        k5 String)))
 {"id":1,"arr":[{"k1":[{"k2":"aaa","k3":"bbb","k4":0},{"k2":"ccc","k3":"","k4":0}],"k5":{"k6":""}}]}
 {"id":2,"arr":[{"k1":[{"k2":"","k3":"ddd","k4":10},{"k2":"","k3":"","k4":20}],"k5":{"k6":"foo"}}]}
 1	[['aaa','ccc']]	[['bbb','']]	[[0,0]]	['']
@@ -14,7 +14,7 @@ Array(Tuple(k1 Int8, k2 Tuple(k3 Int8, k4 Int8, k5 String)))
 {"k1":{"k2":"","k3":"ddd","k4":10}}
 {"k1":{"k2":"aaa","k3":"bbb","k4":0}}
 {"k1":{"k2":"ccc","k3":"","k4":0}}
-Tuple(k2 String, k3 String, k4 Int8)
+Tuple(\n    k2 String,\n    k3 String,\n    k4 Int8)
 {"arr":[{"x":1}]}
 {"arr":{"x":{"y":1},"t":{"y":2}}}
 {"arr":[1,{"y":1}]}
diff --git a/tests/queries/0_stateless/01825_type_json_in_other_types.reference b/tests/queries/0_stateless/01825_type_json_in_other_types.reference
index b94885a65ab..fa8af729cc7 100644
--- a/tests/queries/0_stateless/01825_type_json_in_other_types.reference
+++ b/tests/queries/0_stateless/01825_type_json_in_other_types.reference
@@ -1,4 +1,4 @@
-Tuple(String, Map(String, Array(Tuple(k1 Nested(k2 Int8, k3 Int8, k5 String), k4 String))), Tuple(k1 String, k2 Tuple(k3 String, k4 String)))
+Tuple(String, Map(String, Array(Tuple(\n    k1 Nested(k2 Int8, k3 Int8, k5 String),\n    k4 String))), Tuple(\n    k1 String,\n    k2 Tuple(\n        k3 String,\n        k4 String)))
 =============
 {"id":1,"data":["foo",{"aa":[{"k1":[{"k2":1,"k3":2,"k5":""},{"k2":0,"k3":3,"k5":""}],"k4":""},{"k1":[{"k2":4,"k3":0,"k5":""},{"k2":0,"k3":5,"k5":""},{"k2":6,"k3":0,"k5":""}],"k4":"qqq"}],"bb":[{"k1":[],"k4":"www"},{"k1":[{"k2":7,"k3":8,"k5":""},{"k2":9,"k3":10,"k5":""},{"k2":11,"k3":12,"k5":""}],"k4":""}]},{"k1":"aa","k2":{"k3":"bb","k4":"c"}}]}
 {"id":2,"data":["bar",{"aa":[{"k1":[{"k2":13,"k3":14,"k5":""},{"k2":15,"k3":16,"k5":""}],"k4":"www"}]},{"k1":"","k2":{"k3":"","k4":""}}]}
diff --git a/tests/queries/0_stateless/01825_type_json_insert_select.reference b/tests/queries/0_stateless/01825_type_json_insert_select.reference
index 6778da508f2..cb46a9c607e 100644
--- a/tests/queries/0_stateless/01825_type_json_insert_select.reference
+++ b/tests/queries/0_stateless/01825_type_json_insert_select.reference
@@ -1,10 +1,10 @@
-Tuple(k1 Int8, k2 String)
+Tuple(\n    k1 Int8,\n    k2 String)
 1	(1,'foo')
-Tuple(k1 Int8, k2 String, k3 String)
+Tuple(\n    k1 Int8,\n    k2 String,\n    k3 String)
 1	(1,'foo','')
 2	(2,'bar','')
 3	(3,'','aaa')
-Tuple(arr Nested(k11 Int8, k22 String, k33 Int8), k1 Int8, k2 String, k3 String)
+Tuple(\n    arr Nested(k11 Int8, k22 String, k33 Int8),\n    k1 Int8,\n    k2 String,\n    k3 String)
 1	([],1,'foo','')
 2	([],2,'bar','')
 3	([],3,'','aaa')
@@ -12,7 +12,7 @@ Tuple(arr Nested(k11 Int8, k22 String, k33 Int8), k1 Int8, k2 String, k3 String)
 5	([(0,'str1',0)],0,'','')
 {"data":{"k1":1,"k10":[{"a":"1","b":"2","c":{"k11":""}},{"a":"2","b":"3","c":{"k11":""}}]}}
 {"data":{"k1":2,"k10":[{"a":"1","b":"2","c":{"k11":"haha"}}]}}
-Tuple(k1 Int8, k10 Nested(a String, b String, c Tuple(k11 String)))
+Tuple(\n    k1 Int8,\n    k10 Nested(a String, b String, c Tuple(k11 String)))
 {"data":{"k1":1,"k10":[{"a":"1","b":"2","c":{"k11":""}},{"a":"2","b":"3","c":{"k11":""}}]}}
 {"data":{"k1":2,"k10":[{"a":"1","b":"2","c":{"k11":"haha"}}]}}
-Tuple(k1 Int8, k10 Nested(a String, b String, c Tuple(k11 String)))
+Tuple(\n    k1 Int8,\n    k10 Nested(a String, b String, c Tuple(k11 String)))
diff --git a/tests/queries/0_stateless/01825_type_json_missed_values.reference b/tests/queries/0_stateless/01825_type_json_missed_values.reference
index b480493995b..2a4b3a6f671 100644
--- a/tests/queries/0_stateless/01825_type_json_missed_values.reference
+++ b/tests/queries/0_stateless/01825_type_json_missed_values.reference
@@ -1,2 +1,2 @@
-Tuple(foo Int8, k1 Int8, k2 Int8)
+Tuple(\n    foo Int8,\n    k1 Int8,\n    k2 Int8)
 1
diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.reference b/tests/queries/0_stateless/01825_type_json_multiple_files.reference
index b887abc8590..6dcdb00e139 100644
--- a/tests/queries/0_stateless/01825_type_json_multiple_files.reference
+++ b/tests/queries/0_stateless/01825_type_json_multiple_files.reference
@@ -4,11 +4,11 @@
 {"data":{"k0":0,"k1":0,"k2":0,"k3":100,"k4":0,"k5":0}}
 {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":100,"k5":0}}
 {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":0,"k5":100}}
-Tuple(k0 Int8, k1 Int8, k2 Int8, k3 Int8, k4 Int8, k5 Int8)
+Tuple(\n    k0 Int8,\n    k1 Int8,\n    k2 Int8,\n    k3 Int8,\n    k4 Int8,\n    k5 Int8)
 {"data":{"k0":100,"k1":0,"k2":0}}
 {"data":{"k0":0,"k1":100,"k2":0}}
 {"data":{"k0":0,"k1":0,"k2":100}}
-Tuple(k0 Int8, k1 Int8, k2 Int8)
+Tuple(\n    k0 Int8,\n    k1 Int8,\n    k2 Int8)
 {"data":{"k1":100,"k3":0}}
 {"data":{"k1":0,"k3":100}}
-Tuple(k1 Int8, k3 Int8)
+Tuple(\n    k1 Int8,\n    k3 Int8)
diff --git a/tests/queries/0_stateless/01825_type_json_nbagames.reference b/tests/queries/0_stateless/01825_type_json_nbagames.reference
index 5aa63dceb86..70df8f967f3 100644
--- a/tests/queries/0_stateless/01825_type_json_nbagames.reference
+++ b/tests/queries/0_stateless/01825_type_json_nbagames.reference
@@ -1,5 +1,5 @@
 1000
-data	Tuple(_id Tuple(`$oid` String), date Tuple(`$date` String), teams Nested(abbreviation String, city String, home UInt8, name String, players Nested(ast Int8, blk Int8, drb Int8, fg Int8, fg3 Int8, fg3_pct String, fg3a Int8, fg_pct String, fga Int8, ft Int8, ft_pct String, fta Int8, mp String, orb Int8, pf Int8, player String, pts Int8, stl Int8, tov Int8, trb Int8), results Tuple(ast Int8, blk Int8, drb Int8, fg Int8, fg3 Int8, fg3_pct String, fg3a Int8, fg_pct String, fga Int8, ft Int8, ft_pct String, fta Int8, mp Int16, orb Int8, pf Int8, pts Int16, stl Int8, tov Int8, trb Int8), score Int16, won Int8))					
+data	Tuple(\n    _id Tuple(\n        `$oid` String),\n    date Tuple(\n        `$date` String),\n    teams Nested(abbreviation String, city String, home UInt8, name String, players Nested(ast Int8, blk Int8, drb Int8, fg Int8, fg3 Int8, fg3_pct String, fg3a Int8, fg_pct String, fga Int8, ft Int8, ft_pct String, fta Int8, mp String, orb Int8, pf Int8, player String, pts Int8, stl Int8, tov Int8, trb Int8), results Tuple(ast Int8, blk Int8, drb Int8, fg Int8, fg3 Int8, fg3_pct String, fg3a Int8, fg_pct String, fga Int8, ft Int8, ft_pct String, fta Int8, mp Int16, orb Int8, pf Int8, pts Int16, stl Int8, tov Int8, trb Int8), score Int16, won Int8))					
 Boston Celtics	70
 Los Angeles Lakers	64
 Milwaukee Bucks	61
diff --git a/tests/queries/0_stateless/01825_type_json_nullable.reference b/tests/queries/0_stateless/01825_type_json_nullable.reference
index 587fb1b1bc9..597ede47615 100644
--- a/tests/queries/0_stateless/01825_type_json_nullable.reference
+++ b/tests/queries/0_stateless/01825_type_json_nullable.reference
@@ -1,17 +1,17 @@
-1	(1,2,NULL)	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(Int8))
-2	(NULL,3,4)	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(Int8))
+1	(1,2,NULL)	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(Int8))
+2	(NULL,3,4)	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(Int8))
 1	1	2	\N
 2	\N	3	4
-1	(1,2,NULL)	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(String))
-2	(NULL,3,'4')	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(String))
-3	(NULL,NULL,'10')	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(String))
-4	(NULL,5,'str')	Tuple(k1 Nullable(Int8), k2 Nullable(Int8), k3 Nullable(String))
+1	(1,2,NULL)	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(String))
+2	(NULL,3,'4')	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(String))
+3	(NULL,NULL,'10')	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(String))
+4	(NULL,5,'str')	Tuple(\n    k1 Nullable(Int8),\n    k2 Nullable(Int8),\n    k3 Nullable(String))
 1	1	2	\N
 2	\N	3	4
 3	\N	\N	10
 4	\N	5	str
 ============
-1	([(11,NULL,NULL),(NULL,22,NULL)])	Tuple(k1 Nested(k2 Nullable(Int8), k3 Nullable(Int8), k4 Nullable(Int8)))
-2	([(NULL,33,NULL),(NULL,NULL,44),(NULL,55,66)])	Tuple(k1 Nested(k2 Nullable(Int8), k3 Nullable(Int8), k4 Nullable(Int8)))
+1	([(11,NULL,NULL),(NULL,22,NULL)])	Tuple(\n    k1 Nested(k2 Nullable(Int8), k3 Nullable(Int8), k4 Nullable(Int8)))
+2	([(NULL,33,NULL),(NULL,NULL,44),(NULL,55,66)])	Tuple(\n    k1 Nested(k2 Nullable(Int8), k3 Nullable(Int8), k4 Nullable(Int8)))
 1	[11,NULL]	[NULL,22]	[NULL,NULL]
 2	[NULL,NULL,NULL]	[33,NULL,55]	[NULL,44,66]
diff --git a/tests/queries/0_stateless/01825_type_json_parallel_insert.reference b/tests/queries/0_stateless/01825_type_json_parallel_insert.reference
index 158d61d46f7..e93e0aeb956 100644
--- a/tests/queries/0_stateless/01825_type_json_parallel_insert.reference
+++ b/tests/queries/0_stateless/01825_type_json_parallel_insert.reference
@@ -1 +1 @@
-Tuple(k1 Int8, k2 String)	500000
+Tuple(\n    k1 Int8,\n    k2 String)	500000
diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.reference b/tests/queries/0_stateless/01825_type_json_schema_inference.reference
index a1dd269f9b4..72e3b58b8a8 100644
--- a/tests/queries/0_stateless/01825_type_json_schema_inference.reference
+++ b/tests/queries/0_stateless/01825_type_json_schema_inference.reference
@@ -1,5 +1,5 @@
 {"id":"1","obj":{"k1":1,"k2":{"k3":"2","k4":[{"k5":3,"k6":null},{"k5":4,"k6":null}]},"some":null},"s":"foo"}
 {"id":"2","obj":{"k1":null,"k2":{"k3":"str","k4":[{"k5":null,"k6":55}]},"some":42},"s":"bar"}
-Tuple(k1 Nullable(Int8), k2 Tuple(k3 Nullable(String), k4 Nested(k5 Nullable(Int8), k6 Nullable(Int8))), some Nullable(Int8))
+Tuple(\n    k1 Nullable(Int8),\n    k2 Tuple(\n        k3 Nullable(String),\n        k4 Nested(k5 Nullable(Int8), k6 Nullable(Int8))),\n    some Nullable(Int8))
 {"id":"1","obj":"aaa","s":"foo"}
 {"id":"2","obj":"bbb","s":"bar"}
diff --git a/tests/queries/0_stateless/02149_external_schema_inference.reference b/tests/queries/0_stateless/02149_external_schema_inference.reference
index ebc30e874da..194c8ca62cb 100644
--- a/tests/queries/0_stateless/02149_external_schema_inference.reference
+++ b/tests/queries/0_stateless/02149_external_schema_inference.reference
@@ -31,8 +31,8 @@ lotteryWin	Float64
 someRatio	Float32					
 temperature	Float32					
 randomBigNumber	Int64					
-measureUnits	Array(Tuple(unit String, coef Float32))					
-nestiness_a_b_c	Tuple(d UInt32, e Array(UInt32))					
+measureUnits	Array(Tuple(\n    unit String,\n    coef Float32))					
+nestiness_a_b_c	Tuple(\n    d UInt32,\n    e Array(UInt32))					
 
 location	Array(Int32)					
 pi	Float32					
@@ -78,8 +78,8 @@ lotteryWin	String
 someRatio	String					
 temperature	String					
 randomBigNumber	String					
-measureUnits	Tuple(unit Array(String), coef Array(String))					
-nestiness_a_b_c	Tuple(d String, e Array(String))					
+measureUnits	Tuple(\n    unit Array(String),\n    coef Array(String))					
+nestiness_a_b_c	Tuple(\n    d String,\n    e Array(String))					
 
 uuid	String					
 name	String					
@@ -101,14 +101,14 @@ lotteryWin	Float64
 someRatio	Float32					
 temperature	Float32					
 randomBigNumber	Int64					
-measureunits	Tuple(coef Array(Float32), unit Array(String))					
-nestiness_a_b_c	Tuple(d UInt32, e Array(UInt32))					
+measureunits	Tuple(\n    coef Array(Float32),\n    unit Array(String))					
+nestiness_a_b_c	Tuple(\n    d UInt32,\n    e Array(UInt32))					
 newFieldStr	String					
 newFieldInt	Int32					
 newBool	UInt8					
 
 identifier	String					
-modules	Array(Tuple(module_id UInt32, supply UInt32, temp UInt32, nodes Array(Tuple(node_id UInt32, opening_time UInt32, closing_time UInt32, current UInt32, coords_y Float32))))					
+modules	Array(Tuple(\n    module_id UInt32,\n    supply UInt32,\n    temp UInt32,\n    nodes Array(Tuple(\n        node_id UInt32,\n        opening_time UInt32,\n        closing_time UInt32,\n        current UInt32,\n        coords_y Float32))))					
 
 Capnproto
 
@@ -123,15 +123,15 @@ lc2	Nullable(String)
 lc3	Array(Nullable(String))					
 
 value	UInt64					
-nested	Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))					
+nested	Tuple(\n    a Tuple(\n        b UInt64,\n        c Array(Array(UInt64))),\n    d Array(Tuple(\n        e Array(Array(Tuple(\n            f UInt64,\n            g UInt64))),\n        h Array(Tuple(\n            k Array(UInt64))))))					
 
-nested	Tuple(value Array(UInt64), array Array(Array(UInt64)), tuple Array(Tuple(one UInt64, two UInt64)))					
+nested	Tuple(\n    value Array(UInt64),\n    array Array(Array(UInt64)),\n    tuple Array(Tuple(\n        one UInt64,\n        two UInt64)))					
 
-a	Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))					
+a	Tuple(\n    b UInt64,\n    c Tuple(\n        d UInt64,\n        e Tuple(\n            f UInt64)))					
 
 nullable	Nullable(UInt64)					
 array	Array(Nullable(UInt64))					
-tuple	Tuple(nullable Nullable(UInt64))					
+tuple	Tuple(\n    nullable Nullable(UInt64))					
 
 int8	Int8					
 uint8	UInt8					
@@ -151,8 +151,8 @@ datetime	UInt32
 datetime64	Int64					
 
 value	UInt64					
-tuple1	Tuple(one UInt64, two Tuple(three UInt64, four UInt64))					
-tuple2	Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))					
+tuple1	Tuple(\n    one UInt64,\n    two Tuple(\n        three UInt64,\n        four UInt64))					
+tuple2	Tuple(\n    nested1 Tuple(\n        nested2 Tuple(\n            x UInt64)))					
 
 RawBLOB
 
diff --git a/tests/queries/0_stateless/02149_schema_inference.reference b/tests/queries/0_stateless/02149_schema_inference.reference
index 6d70c4682f5..ca634ac1701 100644
--- a/tests/queries/0_stateless/02149_schema_inference.reference
+++ b/tests/queries/0_stateless/02149_schema_inference.reference
@@ -37,30 +37,30 @@ d	Array(Nullable(Int64))
 JSONCompactEachRow
 c1	Nullable(Float64)					
 c2	Array(Tuple(Nullable(Int64), Nullable(String)))					
-c3	Tuple(key Nullable(Int64), key2 Nullable(Int64))					
+c3	Tuple(\n    key Nullable(Int64),\n    key2 Nullable(Int64))					
 c4	Nullable(Bool)					
 42.42	[(1,'String'),(2,'abcd')]	(42,24)	true
 c1	Nullable(Int64)					
 c2	Array(Tuple(Nullable(Int64), Nullable(String)))					
-c3	Tuple(key1 Nullable(Int64), key2 Nullable(Int64))					
+c3	Tuple(\n    key1 Nullable(Int64),\n    key2 Nullable(Int64))					
 c4	Nullable(Bool)					
 \N	[(1,'String'),(2,NULL)]	(NULL,24)	\N
 32	[(2,'String 2'),(3,'hello')]	(4242,2424)	true
 JSONCompactEachRowWithNames
 a	Nullable(Float64)					
 b	Array(Tuple(Nullable(Int64), Nullable(String)))					
-c	Tuple(key Nullable(Int64), key2 Nullable(Int64))					
+c	Tuple(\n    key Nullable(Int64),\n    key2 Nullable(Int64))					
 d	Nullable(Bool)					
 42.42	[(1,'String'),(2,'abcd')]	(42,24)	true
 JSONEachRow
 a	Nullable(Float64)					
 b	Array(Tuple(Nullable(Int64), Nullable(String)))					
-c	Tuple(key Nullable(Int64), key2 Nullable(Int64))					
+c	Tuple(\n    key Nullable(Int64),\n    key2 Nullable(Int64))					
 d	Nullable(Bool)					
 42.42	[(1,'String'),(2,'abcd')]	(42,24)	true
 a	Nullable(Int64)					
 b	Array(Tuple(Nullable(Int64), Nullable(String)))					
-c	Tuple(key1 Nullable(Int64), key2 Nullable(Int64))					
+c	Tuple(\n    key1 Nullable(Int64),\n    key2 Nullable(Int64))					
 d	Nullable(Bool)					
 \N	[(1,'String'),(2,NULL)]	(NULL,24)	\N
 32	[(2,'String 2'),(3,'hello')]	(4242,2424)	true
diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema_1.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema_1.reference
index 4e020427ad0..ee83ed63dc1 100644
--- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema_1.reference
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema_1.reference
@@ -24,12 +24,12 @@ fixed_string	Nullable(FixedString(3))
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
+tuple	Tuple(\n    `1` Nullable(UInt64),\n    `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
-nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
+nested1	Array(Tuple(\n    `1` Array(Nullable(UInt64)),\n    `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(\n    `1` Tuple(\n        `1` Array(Array(Nullable(UInt64))),\n        `2` Map(UInt64, Array(Tuple(\n            `1` Nullable(UInt64),\n            `2` Nullable(String))))),\n    `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ArrowStream
@@ -58,12 +58,12 @@ fixed_string	Nullable(FixedString(3))
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
+tuple	Tuple(\n    `1` Nullable(UInt64),\n    `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
-nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
+nested1	Array(Tuple(\n    `1` Array(Nullable(UInt64)),\n    `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(\n    `1` Tuple(\n        `1` Array(Array(Nullable(UInt64))),\n        `2` Map(UInt64, Array(Tuple(\n            `1` Nullable(UInt64),\n            `2` Nullable(String))))),\n    `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Parquet
@@ -92,12 +92,12 @@ fixed_string	Nullable(FixedString(3))
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
+tuple	Tuple(\n    `1` Nullable(UInt64),\n    `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
-nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
+nested1	Array(Tuple(\n    `1` Array(Nullable(UInt64)),\n    `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(\n    `1` Tuple(\n        `1` Array(Array(Nullable(UInt64))),\n        `2` Map(UInt64, Array(Tuple(\n            `1` Nullable(UInt64),\n            `2` Nullable(String))))),\n    `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ORC
@@ -126,12 +126,12 @@ fixed_string	Nullable(String)
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(Int64))					
-tuple	Tuple(`1` Nullable(Int64), `2` Nullable(String))					
+tuple	Tuple(\n    `1` Nullable(Int64),\n    `2` Nullable(String))					
 map	Map(String, Nullable(Int64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`1` Array(Nullable(Int64)), `2` Map(String, Nullable(Int64))))					
-nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(Int64))), `2` Map(Int64, Array(Tuple(`1` Nullable(Int64), `2` Nullable(String))))), `2` Nullable(Int8))					
+nested1	Array(Tuple(\n    `1` Array(Nullable(Int64)),\n    `2` Map(String, Nullable(Int64))))					
+nested2	Tuple(\n    `1` Tuple(\n        `1` Array(Array(Nullable(Int64))),\n        `2` Map(Int64, Array(Tuple(\n            `1` Nullable(Int64),\n            `2` Nullable(String))))),\n    `2` Nullable(Int8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Native
diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
index 2ecce985eb4..cd39bf8879b 100644
--- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
@@ -2,7 +2,7 @@ Arrow
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
+arr3	Array(Tuple(\n    `1` Nullable(String),\n    `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@@ -12,7 +12,7 @@ ArrowStream
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
+arr3	Array(Tuple(\n    `1` Nullable(String),\n    `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@@ -22,7 +22,7 @@ Parquet
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
+arr3	Array(Tuple(\n    `1` Nullable(String),\n    `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@@ -32,7 +32,7 @@ ORC
 x	Nullable(Int64)					
 arr1	Array(Nullable(Int64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(Int64)))					
+arr3	Array(Tuple(\n    `1` Nullable(String),\n    `2` Nullable(Int64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
diff --git a/tests/queries/0_stateless/02246_flatten_tuple.reference b/tests/queries/0_stateless/02246_flatten_tuple.reference
index 0320150025d..ad0ca1fa03a 100644
--- a/tests/queries/0_stateless/02246_flatten_tuple.reference
+++ b/tests/queries/0_stateless/02246_flatten_tuple.reference
@@ -1,4 +1,4 @@
-([1,2],['a','b'],3,'c',4)	Tuple(`t1.a` Array(UInt32), `t1.s` Array(String), b UInt32, `t2.k` String, `t2.v` UInt32)
-Tuple(id Int8, obj Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8), s String)	Tuple(id Int8, `obj.k1` Int8, `obj.k2.k3` String, `obj.k2.k4.k5` Array(Int8), `obj.k2.k4.k6` Array(Int8), `obj.some` Int8, s String)
+([1,2],['a','b'],3,'c',4)	Tuple(\n    `t1.a` Array(UInt32),\n    `t1.s` Array(String),\n    b UInt32,\n    `t2.k` String,\n    `t2.v` UInt32)
+Tuple(\n    id Int8,\n    obj Tuple(\n        k1 Int8,\n        k2 Tuple(\n            k3 String,\n            k4 Nested(k5 Int8, k6 Int8)),\n        some Int8),\n    s String)	Tuple(\n    id Int8,\n    `obj.k1` Int8,\n    `obj.k2.k3` String,\n    `obj.k2.k4.k5` Array(Int8),\n    `obj.k2.k4.k6` Array(Int8),\n    `obj.some` Int8,\n    s String)
 1	1	2	[3,4]	[0,0]	0	foo
 2	0	str	[0]	[55]	42	bar
diff --git a/tests/queries/0_stateless/02287_type_object_convert.reference b/tests/queries/0_stateless/02287_type_object_convert.reference
index 2df54dcbcbc..501536f1f3e 100644
--- a/tests/queries/0_stateless/02287_type_object_convert.reference
+++ b/tests/queries/0_stateless/02287_type_object_convert.reference
@@ -1,15 +1,15 @@
-1	(1)	Tuple(x Nullable(Int8))
-1	(1,NULL)	Tuple(x Nullable(Int8), y Nullable(Int8))
-2	(NULL,2)	Tuple(x Nullable(Int8), y Nullable(Int8))
-1	(1,NULL)	Tuple(x Nullable(Int8), y Nullable(Int8))
-2	(NULL,2)	Tuple(x Nullable(Int8), y Nullable(Int8))
-3	(1,2)	Tuple(x Nullable(Int8), y Nullable(Int8))
+1	(1)	Tuple(\n    x Nullable(Int8))
+1	(1,NULL)	Tuple(\n    x Nullable(Int8),\n    y Nullable(Int8))
+2	(NULL,2)	Tuple(\n    x Nullable(Int8),\n    y Nullable(Int8))
+1	(1,NULL)	Tuple(\n    x Nullable(Int8),\n    y Nullable(Int8))
+2	(NULL,2)	Tuple(\n    x Nullable(Int8),\n    y Nullable(Int8))
+3	(1,2)	Tuple(\n    x Nullable(Int8),\n    y Nullable(Int8))
 1	1	\N
 2	\N	2
 3	1	2
-1	(1)	Tuple(x Int8)
-1	(1,0)	Tuple(x Int8, y Int8)
-2	(0,2)	Tuple(x Int8, y Int8)
+1	(1)	Tuple(\n    x Int8)
+1	(1,0)	Tuple(\n    x Int8,\n    y Int8)
+2	(0,2)	Tuple(\n    x Int8,\n    y Int8)
 {"x":1}
 {"x":1}
 {"x":[[1],[1,2]]}
diff --git a/tests/queries/0_stateless/02313_avro_records_and_maps.reference b/tests/queries/0_stateless/02313_avro_records_and_maps.reference
index 24fc635cdce..329462a4dda 100644
--- a/tests/queries/0_stateless/02313_avro_records_and_maps.reference
+++ b/tests/queries/0_stateless/02313_avro_records_and_maps.reference
@@ -1,8 +1,8 @@
-t	Tuple(a Int32, b String)					
+t	Tuple(\n    a Int32,\n    b String)					
 (0,'String')
 (1,'String')
 (2,'String')
-t	Tuple(a Int32, b Tuple(c Int32, d Int32), e Array(Int32))					
+t	Tuple(\n    a Int32,\n    b Tuple(\n        c Int32,\n        d Int32),\n    e Array(Int32))					
 (0,(1,2),[])
 (1,(2,3),[0])
 (2,(3,4),[0,1])
@@ -11,7 +11,7 @@ a.c	Array(Int32)
 [0,1]	[2,3]
 [1,2]	[3,4]
 [2,3]	[4,5]
-a.b	Array(Array(Tuple(c Int32, d Int32)))					
+a.b	Array(Array(Tuple(\n    c Int32,\n    d Int32)))					
 [[(0,1),(2,3)]]
 [[(1,2),(3,4)]]
 [[(2,3),(4,5)]]
@@ -19,7 +19,7 @@ m	Map(String, Int64)
 {'key_0':0}
 {'key_1':1}
 {'key_2':2}
-m	Map(String, Tuple(`1` Int64, `2` Array(Int64)))					
+m	Map(String, Tuple(\n    `1` Int64,\n    `2` Array(Int64)))					
 {'key_0':(0,[])}
 {'key_1':(1,[0])}
 {'key_2':(2,[0,1])}
diff --git a/tests/queries/0_stateless/02314_avro_null_as_default.reference b/tests/queries/0_stateless/02314_avro_null_as_default.reference
index ba38a15f924..e5d1b1c3752 100644
--- a/tests/queries/0_stateless/02314_avro_null_as_default.reference
+++ b/tests/queries/0_stateless/02314_avro_null_as_default.reference
@@ -1,5 +1,5 @@
 a	Nullable(Int64)					
-b	Array(Tuple(c Nullable(Int64), d Nullable(String)))					
+b	Array(Tuple(\n    c Nullable(Int64),\n    d Nullable(String)))					
 1	[(100,'Q'),(200,'W')]
 0
 0
diff --git a/tests/queries/0_stateless/02325_dates_schema_inference.reference b/tests/queries/0_stateless/02325_dates_schema_inference.reference
index a37360dae62..c8eebd3262e 100644
--- a/tests/queries/0_stateless/02325_dates_schema_inference.reference
+++ b/tests/queries/0_stateless/02325_dates_schema_inference.reference
@@ -5,14 +5,14 @@ x	Nullable(DateTime64(9))
 x	Array(Nullable(Date))					
 x	Array(Nullable(DateTime64(9)))					
 x	Array(Nullable(DateTime64(9)))					
-x	Tuple(date1 Nullable(DateTime64(9)), date2 Nullable(Date))					
+x	Tuple(\n    date1 Nullable(DateTime64(9)),\n    date2 Nullable(Date))					
 x	Array(Nullable(DateTime64(9)))					
 x	Array(Nullable(DateTime64(9)))					
 x	Nullable(DateTime64(9))					
 x	Array(Nullable(String))					
 x	Nullable(String)					
 x	Array(Nullable(String))					
-x	Tuple(key1 Array(Array(Nullable(DateTime64(9)))), key2 Array(Array(Nullable(String))))					
+x	Tuple(\n    key1 Array(Array(Nullable(DateTime64(9)))),\n    key2 Array(Array(Nullable(String))))					
 CSV
 c1	Nullable(Date)					
 c1	Nullable(DateTime64(9))					
diff --git a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.reference b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.reference
index a0e0f8f6b5e..d190476a7da 100644
--- a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.reference
+++ b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.reference
@@ -1,12 +1,12 @@
 JSONEachRow
 x	Nullable(Int64)					
 x	Array(Nullable(Int64))					
-x	Tuple(a Array(Nullable(Int64)))					
-x	Tuple(a Array(Nullable(Int64)), b Array(Nullable(Int64)))					
+x	Tuple(\n    a Array(Nullable(Int64)))					
+x	Tuple(\n    a Array(Nullable(Int64)),\n    b Array(Nullable(Int64)))					
 x	Nullable(Float64)					
 x	Nullable(Float64)					
 x	Array(Nullable(Float64))					
-x	Tuple(a Array(Nullable(Int64)), b Array(Nullable(Float64)))					
+x	Tuple(\n    a Array(Nullable(Int64)),\n    b Array(Nullable(Float64)))					
 CSV
 c1	Nullable(Int64)					
 c1	Array(Nullable(Int64))					
diff --git a/tests/queries/0_stateless/02421_type_json_empty_parts.reference b/tests/queries/0_stateless/02421_type_json_empty_parts.reference
index f360b4b92cd..3c1d2aafec1 100644
--- a/tests/queries/0_stateless/02421_type_json_empty_parts.reference
+++ b/tests/queries/0_stateless/02421_type_json_empty_parts.reference
@@ -3,24 +3,24 @@ Collapsing
 0
 id	UInt64					
 s	Int8					
-data	Tuple(_dummy UInt8)					
+data	Tuple(\n    _dummy UInt8)					
 DELETE all
 2
 1
 id	UInt64					
-data	Tuple(k1 String, k2 String)					
+data	Tuple(\n    k1 String,\n    k2 String)					
 0
 0
 id	UInt64					
-data	Tuple(_dummy UInt8)					
+data	Tuple(\n    _dummy UInt8)					
 TTL
 1
 1
 id	UInt64					
 d	Date					
-data	Tuple(k1 String, k2 String)					
+data	Tuple(\n    k1 String,\n    k2 String)					
 0
 0
 id	UInt64					
 d	Date					
-data	Tuple(_dummy UInt8)					
+data	Tuple(\n    _dummy UInt8)					
diff --git a/tests/queries/0_stateless/02521_avro_union_null_nested.reference b/tests/queries/0_stateless/02521_avro_union_null_nested.reference
index e4818b4bcac..a3cb5ba4858 100644
--- a/tests/queries/0_stateless/02521_avro_union_null_nested.reference
+++ b/tests/queries/0_stateless/02521_avro_union_null_nested.reference
@@ -5,7 +5,7 @@ added_snapshot_id	Nullable(Int64)
 added_data_files_count	Nullable(Int32)					
 existing_data_files_count	Nullable(Int32)					
 deleted_data_files_count	Nullable(Int32)					
-partitions	Array(Tuple(contains_null Bool, contains_nan Nullable(Bool), lower_bound Nullable(String), upper_bound Nullable(String)))					
+partitions	Array(Tuple(\n    contains_null Bool,\n    contains_nan Nullable(Bool),\n    lower_bound Nullable(String),\n    upper_bound Nullable(String)))					
 added_rows_count	Nullable(Int64)					
 existing_rows_count	Nullable(Int64)					
 deleted_rows_count	Nullable(Int64)					
diff --git a/tests/queries/0_stateless/02522_avro_complicate_schema.reference b/tests/queries/0_stateless/02522_avro_complicate_schema.reference
index 55c0369020f..a885163d609 100644
--- a/tests/queries/0_stateless/02522_avro_complicate_schema.reference
+++ b/tests/queries/0_stateless/02522_avro_complicate_schema.reference
@@ -1,5 +1,5 @@
 status	Int32					
 snapshot_id	Nullable(Int64)					
-data_file	Tuple(file_path String, file_format String, partition Tuple(vendor_id Nullable(Int64)), record_count Int64, file_size_in_bytes Int64, block_size_in_bytes Int64, column_sizes Array(Tuple(key Int32, value Int64)), value_counts Array(Tuple(key Int32, value Int64)), null_value_counts Array(Tuple(key Int32, value Int64)), nan_value_counts Array(Tuple(key Int32, value Int64)), lower_bounds Array(Tuple(key Int32, value String)), upper_bounds Array(Tuple(key Int32, value String)), key_metadata Nullable(String), split_offsets Array(Int64), sort_order_id Nullable(Int32))					
+data_file	Tuple(\n    file_path String,\n    file_format String,\n    partition Tuple(\n        vendor_id Nullable(Int64)),\n    record_count Int64,\n    file_size_in_bytes Int64,\n    block_size_in_bytes Int64,\n    column_sizes Array(Tuple(\n        key Int32,\n        value Int64)),\n    value_counts Array(Tuple(\n        key Int32,\n        value Int64)),\n    null_value_counts Array(Tuple(\n        key Int32,\n        value Int64)),\n    nan_value_counts Array(Tuple(\n        key Int32,\n        value Int64)),\n    lower_bounds Array(Tuple(\n        key Int32,\n        value String)),\n    upper_bounds Array(Tuple(\n        key Int32,\n        value String)),\n    key_metadata Nullable(String),\n    split_offsets Array(Int64),\n    sort_order_id Nullable(Int32))					
 1	6850377589038341628	('file:/warehouse/nyc.db/taxis/data/vendor_id=1/00000-0-c070e655-dc44-43d2-a01a-484f107210cb-00001.parquet','PARQUET',(1),2,1565,67108864,[(1,87),(2,51),(3,51),(4,57),(5,51)],[(1,2),(2,2),(3,2),(4,2),(5,2)],[(1,0),(2,0),(3,0),(4,0),(5,0)],[(3,0),(4,0)],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'ff�?'),(4,'�p=\nף.@'),(5,'N')],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'ffA'),(4,'q=\nףE@'),(5,'Y')],NULL,[4],0)
 1	6850377589038341628	('file:/warehouse/nyc.db/taxis/data/vendor_id=2/00000-0-c070e655-dc44-43d2-a01a-484f107210cb-00002.parquet','PARQUET',(2),2,1620,67108864,[(1,87),(2,51),(3,51),(4,57),(5,89)],[(1,2),(2,2),(3,2),(4,2),(5,2)],[(1,0),(2,0),(3,0),(4,0),(5,0)],[(3,0),(4,0)],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'fff?'),(4,'��Q�"@'),(5,'N')],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'\0\0 @'),(4,'fffff&6@'),(5,'N')],NULL,[4],0)
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.reference b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
index 864cd780ddb..31172c41262 100644
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
@@ -1,5 +1,5 @@
 a	Nullable(Int64)					
-obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+obj	Tuple(\n    f1 Nullable(Int64),\n    f2 Nullable(String),\n    f3 Nullable(Int64))					
 b	Nullable(Int64)					
 c	Nullable(String)					
 {"a":"1","obj":{"f1":"1","f2":"2020-01-01","f3":null},"b":null,"c":null}
@@ -10,11 +10,11 @@ UNION	data2.jsonl	b Nullable(Int64), obj Tuple(f2 Nullable(String), f3 Nullable(
 UNION	data3.jsonl	c Nullable(String)
 c	Nullable(String)					
 a	Nullable(Int64)					
-obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+obj	Tuple(\n    f1 Nullable(Int64),\n    f2 Nullable(String),\n    f3 Nullable(Int64))					
 b	Nullable(Int64)					
 c	Nullable(String)					
 a	Nullable(Int64)					
-obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+obj	Tuple(\n    f1 Nullable(Int64),\n    f2 Nullable(String),\n    f3 Nullable(Int64))					
 b	Nullable(Int64)					
 c	Nullable(String)					
 {"a":"1","obj":{"f1":"1","f2":"2020-01-01","f3":null},"b":null,"c":null}
@@ -25,7 +25,7 @@ UNION	archive.tar::data2.jsonl	b Nullable(Int64), obj Tuple(f2 Nullable(String),
 UNION	archive.tar::data3.jsonl	c Nullable(String)
 c	Nullable(String)					
 a	Nullable(Int64)					
-obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+obj	Tuple(\n    f1 Nullable(Int64),\n    f2 Nullable(String),\n    f3 Nullable(Int64))					
 b	Nullable(Int64)					
 c	Nullable(String)					
 1
diff --git a/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference b/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
index dfdd38f5e8e..46738c95cd5 100644
--- a/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
+++ b/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
@@ -1,9 +1,9 @@
 int64_column	Nullable(Int64)					
 string_column	Nullable(String)					
 float64_column	Nullable(Float64)					
-tuple_column	Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))					
-array_tuple_column	Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))					
-map_tuple_column	Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))					
+tuple_column	Tuple(\n    a Nullable(String),\n    b Nullable(Float64),\n    c Nullable(Int64))					
+array_tuple_column	Array(Tuple(\n    a Nullable(String),\n    b Nullable(Float64),\n    c Nullable(Int64)))					
+map_tuple_column	Map(String, Tuple(\n    a Nullable(String),\n    b Nullable(Float64),\n    c Nullable(Int64)))					
 -- { echoOn }
 -- Test primitive types
 select int64_column, string_column, float64_column from file('02906.orc') where int64_column % 15 = 0;

From 9c465965566019c1184a07579be7764c049ea91f Mon Sep 17 00:00:00 2001
From: Mathieu Rey <matrey@gmail.com>
Date: Fri, 5 Jan 2024 14:07:51 +0800
Subject: [PATCH 168/204] Fix example 3 and tweak formatting

* example 3 is about having several SQL queries in the same input, made an example illustrating that
* removed the sql marker for all results except example 2 to emphasize when you would get colorized output in the terminal
---
 .../operations/utilities/clickhouse-format.md | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md
index 101310cc65e..3e4295598aa 100644
--- a/docs/en/operations/utilities/clickhouse-format.md
+++ b/docs/en/operations/utilities/clickhouse-format.md
@@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
 
 Result:
 
-```sql
+```bash
 SELECT number
 FROM numbers(10)
 WHERE number % 2
@@ -49,22 +49,20 @@ SELECT sum(number) FROM numbers(5)
 3. Multiqueries:
 
 ```bash
-$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
+$ clickhouse-format -n <<< "SELECT min(number) FROM numbers(5); SELECT max(number) FROM numbers(5);"
 ```
 
 Result:
 
-```sql
-SELECT *
-FROM
-(
-    SELECT 1 AS x
-    UNION ALL
-    SELECT 1
-    UNION DISTINCT
-    SELECT 3
-)
+```
+SELECT min(number)
+FROM numbers(5)
 ;
+
+SELECT max(number)
+FROM numbers(5)
+;
+
 ```
 
 4. Obfuscating:
@@ -75,7 +73,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
 
 Result:
 
-```sql
+```
 SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
 ```
 
@@ -87,7 +85,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
 
 Result:
 
-```sql
+```
 SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
 ```
 
@@ -99,7 +97,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
 
 Result:
 
-```sql
+```
 SELECT * \
 FROM  \
 ( \

From e2944890f2ed887d99b9018eaf35c01f2724d154 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 5 Jan 2024 10:28:08 +0000
Subject: [PATCH 169/204] Use master commit

---
 contrib/NuRaft | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 636e83c33b2..2f5f52c4d8c 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 636e83c33b2243d88935d8bf78022f225f315154
+Subproject commit 2f5f52c4d8c87c2a3a3d101ca3a0194c9b77526f

From 15b66031bc1610f6324b7e9e1c51c8ff5c732661 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 10:53:07 +0000
Subject: [PATCH 170/204] Update prepared sets.

---
 src/Interpreters/ActionsVisitor.cpp |  2 +-
 src/Interpreters/DatabaseCatalog.h  |  6 ++---
 src/Interpreters/PreparedSets.cpp   | 32 ++++++++++++++++++++------
 src/Interpreters/PreparedSets.h     | 35 +++++++++++++++++------------
 src/Storages/MergeTree/RPNBuilder.h | 11 ---------
 src/Storages/VirtualColumnUtils.cpp | 14 +-----------
 6 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 827914eaefe..1789cc6c4b1 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -1419,7 +1419,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool
             return set;
         }
 
-        FutureSetPtr external_table_set;
+        FutureSetFromSubqueryPtr external_table_set;
 
         /// A special case is if the name of the table is specified on the right side of the IN statement,
         ///  and the table has the type Set (a previously prepared set).
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 6d8fd84557c..19882b0b828 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -82,8 +82,8 @@ private:
 
 using DDLGuardPtr = std::unique_ptr<DDLGuard>;
 
-class FutureSet;
-using FutureSetPtr = std::shared_ptr<FutureSet>;
+class FutureSetFromSubquery;
+using FutureSetFromSubqueryPtr = std::shared_ptr<FutureSetFromSubquery>;
 
 /// Creates temporary table in `_temporary_and_external_tables` with randomly generated unique StorageID.
 /// Such table can be accessed from everywhere by its ID.
@@ -116,7 +116,7 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext
 
     IDatabase * temporary_tables = nullptr;
     UUID id = UUIDHelpers::Nil;
-    FutureSetPtr future_set;
+    FutureSetFromSubqueryPtr future_set;
 };
 
 ///TODO maybe remove shared_ptr from here?
diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index 18a25482b7f..cc3db726f01 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -97,7 +97,7 @@ FutureSetFromSubquery::FutureSetFromSubquery(
     String key,
     std::unique_ptr<QueryPlan> source_,
     StoragePtr external_table_,
-    FutureSetPtr external_table_set_,
+    std::shared_ptr<FutureSetFromSubquery> external_table_set_,
     const Settings & settings,
     bool in_subquery_)
     : external_table(std::move(external_table_))
@@ -168,6 +168,24 @@ std::unique_ptr<QueryPlan> FutureSetFromSubquery::build(const ContextPtr & conte
     return plan;
 }
 
+void FutureSetFromSubquery::buildSetInplace(const ContextPtr & context)
+{
+    if (external_table_set)
+        external_table_set->buildSetInplace(context);
+
+    auto plan = build(context);
+
+    if (!plan)
+        return;
+
+    auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
+    auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
+    pipeline.complete(std::make_shared<EmptySink>(Block()));
+
+    CompletedPipelineExecutor executor(pipeline);
+    executor.execute();
+}
+
 SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context)
 {
     if (!context->getSettingsRef().use_index_for_in_with_subqueries)
@@ -233,7 +251,7 @@ String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes &
     return buf.str();
 }
 
-FutureSetPtr PreparedSets::addFromTuple(const Hash & key, Block block, const Settings & settings)
+FutureSetFromTuplePtr PreparedSets::addFromTuple(const Hash & key, Block block, const Settings & settings)
 {
     auto from_tuple = std::make_shared<FutureSetFromTuple>(std::move(block), settings);
     const auto & set_types = from_tuple->getTypes();
@@ -247,7 +265,7 @@ FutureSetPtr PreparedSets::addFromTuple(const Hash & key, Block block, const Set
     return from_tuple;
 }
 
-FutureSetPtr PreparedSets::addFromStorage(const Hash & key, SetPtr set_)
+FutureSetFromStoragePtr PreparedSets::addFromStorage(const Hash & key, SetPtr set_)
 {
     auto from_storage = std::make_shared<FutureSetFromStorage>(std::move(set_));
     auto [it, inserted] = sets_from_storage.emplace(key, from_storage);
@@ -258,11 +276,11 @@ FutureSetPtr PreparedSets::addFromStorage(const Hash & key, SetPtr set_)
     return from_storage;
 }
 
-FutureSetPtr PreparedSets::addFromSubquery(
+FutureSetFromSubqueryPtr PreparedSets::addFromSubquery(
     const Hash & key,
     std::unique_ptr<QueryPlan> source,
     StoragePtr external_table,
-    FutureSetPtr external_table_set,
+    FutureSetFromSubqueryPtr external_table_set,
     const Settings & settings,
     bool in_subquery)
 {
@@ -282,7 +300,7 @@ FutureSetPtr PreparedSets::addFromSubquery(
     return from_subquery;
 }
 
-FutureSetPtr PreparedSets::addFromSubquery(
+FutureSetFromSubqueryPtr PreparedSets::addFromSubquery(
     const Hash & key,
     QueryTreeNodePtr query_tree,
     const Settings & settings)
@@ -300,7 +318,7 @@ FutureSetPtr PreparedSets::addFromSubquery(
     return from_subquery;
 }
 
-FutureSetPtr PreparedSets::findTuple(const Hash & key, const DataTypes & types) const
+FutureSetFromTuplePtr PreparedSets::findTuple(const Hash & key, const DataTypes & types) const
 {
     auto it = sets_from_tuple.find(key);
     if (it == sets_from_tuple.end())
diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h
index 9f8bac9f71c..7178cff73b9 100644
--- a/src/Interpreters/PreparedSets.h
+++ b/src/Interpreters/PreparedSets.h
@@ -69,6 +69,8 @@ private:
     SetPtr set;
 };
 
+using FutureSetFromStoragePtr = std::shared_ptr<FutureSetFromStorage>;
+
 /// Set from tuple is filled as well as set from storage.
 /// Additionally, it can be converted to set useful for PK.
 class FutureSetFromTuple final : public FutureSet
@@ -86,6 +88,8 @@ private:
     SetKeyColumns set_key_columns;
 };
 
+using FutureSetFromTuplePtr = std::shared_ptr<FutureSetFromTuple>;
+
 /// Set from subquery can be built inplace for PK or in CreatingSet step.
 /// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created,
 /// but ordinary set would be created instead.
@@ -96,7 +100,7 @@ public:
         String key,
         std::unique_ptr<QueryPlan> source_,
         StoragePtr external_table_,
-        FutureSetPtr external_table_set_,
+        std::shared_ptr<FutureSetFromSubquery> external_table_set_,
         const Settings & settings,
         bool in_subquery_);
 
@@ -110,6 +114,7 @@ public:
     SetPtr buildOrderedSetInplace(const ContextPtr & context) override;
 
     std::unique_ptr<QueryPlan> build(const ContextPtr & context);
+    void buildSetInplace(const ContextPtr & context);
 
     QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); }
     void setQueryPlan(std::unique_ptr<QueryPlan> source_);
@@ -119,7 +124,7 @@ public:
 private:
     SetAndKeyPtr set_and_key;
     StoragePtr external_table;
-    FutureSetPtr external_table_set;
+    std::shared_ptr<FutureSetFromSubquery> external_table_set;
 
     std::unique_ptr<QueryPlan> source;
     QueryTreeNodePtr query_tree;
@@ -130,6 +135,8 @@ private:
                               //       with new analyzer it's not a case
 };
 
+using FutureSetFromSubqueryPtr = std::shared_ptr<FutureSetFromSubquery>;
+
 /// Container for all the sets used in query.
 class PreparedSets
 {
@@ -141,32 +148,32 @@ public:
         UInt64 operator()(const Hash & key) const { return key.low64 ^ key.high64; }
     };
 
-    using SetsFromTuple = std::unordered_map<Hash, std::vector<std::shared_ptr<FutureSetFromTuple>>, Hashing>;
-    using SetsFromStorage = std::unordered_map<Hash, std::shared_ptr<FutureSetFromStorage>, Hashing>;
-    using SetsFromSubqueries = std::unordered_map<Hash, std::shared_ptr<FutureSetFromSubquery>, Hashing>;
+    using SetsFromTuple = std::unordered_map<Hash, std::vector<FutureSetFromTuplePtr>, Hashing>;
+    using SetsFromStorage = std::unordered_map<Hash, FutureSetFromStoragePtr, Hashing>;
+    using SetsFromSubqueries = std::unordered_map<Hash, FutureSetFromSubqueryPtr, Hashing>;
 
-    FutureSetPtr addFromStorage(const Hash & key, SetPtr set_);
-    FutureSetPtr addFromTuple(const Hash & key, Block block, const Settings & settings);
+    FutureSetFromStoragePtr addFromStorage(const Hash & key, SetPtr set_);
+    FutureSetFromTuplePtr addFromTuple(const Hash & key, Block block, const Settings & settings);
 
-    FutureSetPtr addFromSubquery(
+    FutureSetFromSubqueryPtr addFromSubquery(
         const Hash & key,
         std::unique_ptr<QueryPlan> source,
         StoragePtr external_table,
-        FutureSetPtr external_table_set,
+        FutureSetFromSubqueryPtr external_table_set,
         const Settings & settings,
         bool in_subquery = false);
 
-    FutureSetPtr addFromSubquery(
+    FutureSetFromSubqueryPtr addFromSubquery(
         const Hash & key,
         QueryTreeNodePtr query_tree,
         const Settings & settings);
 
-    FutureSetPtr findTuple(const Hash & key, const DataTypes & types) const;
-    std::shared_ptr<FutureSetFromStorage> findStorage(const Hash & key) const;
-    std::shared_ptr<FutureSetFromSubquery> findSubquery(const Hash & key) const;
+    FutureSetFromTuplePtr findTuple(const Hash & key, const DataTypes & types) const;
+    FutureSetFromStoragePtr findStorage(const Hash & key) const;
+    FutureSetFromSubqueryPtr findSubquery(const Hash & key) const;
     void markAsINSubquery(const Hash & key);
 
-    using Subqueries = std::vector<std::shared_ptr<FutureSetFromSubquery>>;
+    using Subqueries = std::vector<FutureSetFromSubqueryPtr>;
     Subqueries getSubqueries() const;
     bool hasSubqueries() const { return !sets_from_subqueries.empty(); }
 
diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h
index f14f241cac8..b0755ccd3ca 100644
--- a/src/Storages/MergeTree/RPNBuilder.h
+++ b/src/Storages/MergeTree/RPNBuilder.h
@@ -202,17 +202,6 @@ public:
         traverseTree(RPNBuilderTreeNode(filter_actions_dag_node, tree_context));
     }
 
-    RPNBuilder(const ASTPtr & filter_node,
-        ContextPtr query_context_,
-        Block block_with_constants_,
-        PreparedSetsPtr prepared_sets_,
-        const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_)
-        : tree_context(std::move(query_context_), std::move(block_with_constants_), std::move(prepared_sets_))
-        , extract_atom_from_tree_function(extract_atom_from_tree_function_)
-    {
-        traverseTree(RPNBuilderTreeNode(filter_node.get(), tree_context));
-    }
-
     RPNElements && extractRPN() && { return std::move(rpn_elements); }
 
 private:
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index aed06fb0540..85d8c18261b 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -250,19 +250,7 @@ static void makeSets(const ExpressionActionsPtr & actions, const ContextPtr & co
                 if (!future_set->get())
                 {
                     if (auto * set_from_subquery = typeid_cast<FutureSetFromSubquery *>(future_set.get()))
-                    {
-                        auto plan = set_from_subquery->build(context);
-
-                        if (!plan)
-                            continue;
-
-                        auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
-                        auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
-                        pipeline.complete(std::make_shared<EmptySink>(Block()));
-
-                        CompletedPipelineExecutor executor(pipeline);
-                        executor.execute();
-                    }
+                        set_from_subquery->buildSetInplace(context);
                 }
             }
         }

From 8bcfa888a53a3f8e7764cd834cf0165d530b59f9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 11:50:09 +0000
Subject: [PATCH 171/204] Fixing index hint

---
 src/Storages/MergeTree/MergeTreeData.cpp               |  8 +++++---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp |  4 ++--
 src/Storages/System/StorageSystemTables.cpp            |  2 +-
 src/Storages/VirtualColumnUtils.cpp                    | 10 +++++-----
 src/Storages/VirtualColumnUtils.h                      |  2 +-
 5 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 0a9dd49c621..d3220591738 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1082,13 +1082,15 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
     auto metadata_snapshot = getInMemoryMetadataPtr();
     Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
+    auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
+
     // Generate valid expressions for filtering
     bool valid = true;
-    for (const auto * input : filter_actions_dag->getInputs())
+    for (const auto * input : filter_dag->getInputs())
         if (!virtual_columns_block.has(input->result_name))
             valid = false;
 
-    PartitionPruner partition_pruner(metadata_snapshot, filter_actions_dag, local_context, true /* strict */);
+    PartitionPruner partition_pruner(metadata_snapshot, filter_dag, local_context, true /* strict */);
     if (partition_pruner.isUseless() && !valid)
         return {};
 
@@ -1096,7 +1098,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
     if (valid)
     {
         virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
-        VirtualColumnUtils::filterBlockWithDAG(filter_actions_dag, virtual_columns_block, local_context);
+        VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
         part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
         if (part_values.empty())
             return 0;
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 9c7e6933630..d5b9b4423a9 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -784,7 +784,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(
         = {ColumnWithTypeAndName(part_offset_type->createColumn(), part_offset_type, "_part_offset"),
            ColumnWithTypeAndName(part_type->createColumn(), part_type, "_part")};
 
-    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), sample);
+    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
     if (!dag)
         return;
 
@@ -810,7 +810,7 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     if (!filter_dag)
         return {};
     auto sample = data.getSampleBlockWithVirtualColumns();
-    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), sample);
+    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
     if (!dag)
         return {};
 
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 53b28543bf1..d2c01ec3dea 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -104,7 +104,7 @@ ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr
     MutableColumnPtr database_column = ColumnString::create();
     MutableColumnPtr engine_column;
 
-    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, sample);
+    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample);
     if (dag)
     {
         bool filter_by_engine = false;
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 46564e0642a..e845e03d122 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -394,7 +394,7 @@ ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, con
     }
 
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-    return splitFilterDagForAllowedInputs(predicate, block);
+    return splitFilterDagForAllowedInputs(predicate, &block);
 }
 
 ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
@@ -468,7 +468,7 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo
 
 static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
     const ActionsDAG::Node * node,
-    const Block & allowed_inputs,
+    const Block * allowed_inputs,
     ActionsDAG::Nodes & additional_nodes)
 {
     if (node->type == ActionsDAG::ActionType::FUNCTION)
@@ -543,13 +543,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
         }
     }
 
-    if (!canEvaluateSubtree(node, allowed_inputs))
+    if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs))
         return nullptr;
 
     return node;
 }
 
-ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block & allowed_inputs)
+ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
 {
     if (!predicate)
         return nullptr;
@@ -564,7 +564,7 @@ ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate,
 
 void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
 {
-    auto dag = splitFilterDagForAllowedInputs(predicate, block);
+    auto dag = splitFilterDagForAllowedInputs(predicate, &block);
     if (dag)
         filterBlockWithDAG(dag, block, context);
 }
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 3c07e33a177..7a9b2605339 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -42,7 +42,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block,
 void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context);
 
 /// Extract a part of predicate that can be evaluated using only columns from input_names.
-ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block & allowed_inputs);
+ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
 
 /// Extract from the input stream a set of `name` column values
 template <typename T>

From 2cc2a9089c9d06bb5f7cc6b8e2bbedc11bac2fc1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 5 Jan 2024 12:18:21 +0000
Subject: [PATCH 172/204] Incorporate review feedback

---
 .../functions/string-functions.md             |  18 +-
 src/Functions/idna.cpp                        | 202 ++++++++++++
 src/Functions/punycode.cpp                    | 308 +++---------------
 .../queries/0_stateless/02932_idna.reference  |  61 ++--
 tests/queries/0_stateless/02932_idna.sql      | 157 ++++-----
 .../0_stateless/02932_punycode.reference      |  83 ++---
 tests/queries/0_stateless/02932_punycode.sql  |  90 +++--
 7 files changed, 448 insertions(+), 471 deletions(-)
 create mode 100644 src/Functions/idna.cpp

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 668592a6fd9..3461ab28bf9 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1386,7 +1386,7 @@ Result:
 ## punycodeEncode
 
 Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string.
-The string must be UTF8-encoded, otherwise an exception is thrown.
+The string must be UTF8-encoded, otherwise the behavior is undefined.
 
 **Syntax**
 
@@ -1416,10 +1416,6 @@ Result:
 └───────────────────────────┘
 ```
 
-## punycodeEncodeOrNull
-
-Like `punycodeEncode` but returns `NULL` in case of an error instead of throwing an exception.
-
 ## punycodeDecode
 
 Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
@@ -1448,19 +1444,19 @@ select punycodeDecode('Mnchen-3ya');
 Result:
 
 ```result
-┌─punycodeEncode('Mnchen-3ya')─┐
+┌─punycodeDecode('Mnchen-3ya')─┐
 │ München                      │
 └──────────────────────────────┘
 ```
 
-## punycodeDecodeOrNull
+## tryPunycodeDecode
 
-Like `punycodeDecode` but returns `NULL` in case of an error instead of throwing an exception.
+Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded string is given.
 
 ## idnaEncode
 
 Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
-The string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
+The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
 Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
 
 **Syntax**
@@ -1491,9 +1487,9 @@ Result:
 └─────────────────────────────────────┘
 ```
 
-## idnaEncodeOrNull
+## tryIdnaEncode
 
-Like `idnaEncode` but returns `NULL` in case of an error instead of throwing an exception.
+Like `idnaEncode` but returns an empty string in case of an error instead of throwing an exception.
 
 ## idnaDecode
 
diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp
new file mode 100644
index 00000000000..087131021c9
--- /dev/null
+++ b/src/Functions/idna.cpp
@@ -0,0 +1,202 @@
+#include "config.h"
+
+#if USE_IDNA
+
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+
+#ifdef __clang__
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wnewline-eof"
+#endif
+#    include <ada/idna/to_ascii.h>
+#    include <ada/idna/to_unicode.h>
+#    include <ada/idna/unicode_transcoding.h>
+#ifdef __clang__
+#    pragma clang diagnostic pop
+#endif
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int NOT_IMPLEMENTED;
+}
+
+/// Implementation of
+/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
+///   and [3] https://www.unicode.org/reports/tr46/#ToUnicode
+
+
+enum class ErrorHandling
+{
+    Throw,  /// Throw exception
+    Empty   /// Return empty string
+};
+
+
+/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
+/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
+/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
+/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
+template <ErrorHandling error_handling>
+struct IdnaEncode
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+
+        size_t prev_offset = 0;
+        std::string ascii;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+
+            std::string_view value_view(value, value_length);
+            if (!value_view.empty()) /// to_ascii() expects non-empty input
+            {
+                ascii = ada::idna::to_ascii(value_view);
+                const bool ok = !ascii.empty();
+                if (!ok)
+                {
+                    if constexpr (error_handling == ErrorHandling::Throw)
+                    {
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
+                    }
+                    else
+                    {
+                        static_assert(error_handling == ErrorHandling::Empty);
+                        ascii.clear();
+                    }
+                }
+            }
+
+            res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            ascii.clear();
+        }
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
+    }
+};
+
+struct IdnaDecode
+{
+    /// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
+
+        size_t prev_offset = 0;
+        std::string unicode;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+            std::string_view value_view(value, value_length);
+
+            unicode = ada::idna::to_unicode(value_view);
+
+            res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
+            res_offsets.push_back(res_data.size());
+
+            prev_offset = offsets[row];
+
+            unicode.clear();
+        }
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
+    }
+};
+
+struct NameIdnaEncode { static constexpr auto name = "idnaEncode"; };
+struct NameTryIdnaEncode { static constexpr auto name = "tryIdnaEncode"; };
+struct NameIdnaDecode { static constexpr auto name = "idnaDecode"; };
+
+using FunctionIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Throw>, NameIdnaEncode>;
+using FunctionTryIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Empty>, NameTryIdnaEncode>;
+using FunctionIdnaDecode = FunctionStringToString<IdnaDecode, NameIdnaDecode>;
+
+REGISTER_FUNCTION(Idna)
+{
+    factory.registerFunction<FunctionIdnaEncode>(FunctionDocumentation{
+        .description=R"(
+Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
+        .syntax="idnaEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaEncode('straße.münchen.de') AS ascii;",
+            R"(
+┌─ascii───────────────────────────┐
+│ xn--strae-oqa.xn--mnchen-3ya.de │
+└─────────────────────────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionTryIdnaEncode>(FunctionDocumentation{
+        .description=R"(
+Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
+        .syntax="punycodeEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaEncodeOrNull('München') AS ascii;",
+            R"(
+┌─ascii───────────────────────────┐
+│ xn--strae-oqa.xn--mnchen-3ya.de │
+└─────────────────────────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
+        .description=R"(
+Computes a Unicode representation of an Internationalized Domain Name.)",
+        .syntax="idnaDecode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
+            R"(
+┌─unicode───────────┐
+│ straße.münchen.de │
+└───────────────────┘
+            )"
+            }}
+    });
+}
+
+}
+
+#endif
+
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index d015b6212c8..66e5ac6cb7b 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -2,21 +2,15 @@
 
 #if USE_IDNA
 
-#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
+#include <Functions/FunctionStringToString.h>
 
 #ifdef __clang__
 #    pragma clang diagnostic push
 #    pragma clang diagnostic ignored "-Wnewline-eof"
 #endif
 #    include <ada/idna/punycode.h>
-#    include <ada/idna/to_ascii.h>
-#    include <ada/idna/to_unicode.h>
 #    include <ada/idna/unicode_transcoding.h>
 #ifdef __clang__
 #    pragma clang diagnostic pop
@@ -28,93 +22,31 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
+    extern const int NOT_IMPLEMENTED;
 }
 
-namespace
-{
+/// Implementation of
+/// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode
 
 enum class ErrorHandling
 {
-    Throw,
-    Null
-};
-
-/// Implementation of
-/// - punycode(En|De)code[OrNull](), see [1]
-/// - idna(En|De)code[OrNull](), see [2, 3]
-///
-/// [1] https://en.wikipedia.org/wiki/Punycode
-/// [2] https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
-/// [3] https://www.unicode.org/reports/tr46/#ToUnicode
-
-
-/// Kind of similar to FunctionStringToString but accepts String arguments only, and supports `OrNull` overloads
-template <typename Impl>
-class FunctionIdnaPunycodeBase : public IFunction
-{
-public:
-    static constexpr auto name = Impl::name;
-
-    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionIdnaPunycodeBase<Impl>>(); }
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"str", &isString<IDataType>, nullptr, "String"},
-        };
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        auto return_type = std::make_shared<DataTypeString>();
-
-        if constexpr (Impl::error_handling == ErrorHandling::Null)
-            return makeNullable(return_type);
-        else
-            return return_type;
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
-    {
-        auto col_res = ColumnString::create();
-        ColumnUInt8::MutablePtr col_res_null;
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
-            Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_res_null);
-        else
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of argument of function {}",
-                arguments[0].column->getName(), getName());
-
-        if constexpr (Impl::error_handling == ErrorHandling::Null)
-            return ColumnNullable::create(std::move(col_res), std::move(col_res_null));
-        else
-            return col_res;
-    }
+    Throw,  /// Throw exception
+    Empty   /// Return empty string
 };
 
 
-template <ErrorHandling error_handling_>
-struct PunycodeEncodeImpl
+struct PunycodeEncode
 {
-    static constexpr auto error_handling = error_handling_;
-    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeEncodeOrNull" : "punycodeEncode";
-
+    /// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, returnn undefined output, i.e. garbage-in, garbage-out.
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets,
-        ColumnUInt8::MutablePtr & col_res_null)
+        ColumnString::Offsets & res_offsets)
     {
         const size_t rows = offsets.size();
         res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
         res_offsets.reserve(rows);
-        if constexpr (error_handling == ErrorHandling::Null)
-            col_res_null = ColumnUInt8::create(rows, 0);
 
         size_t prev_offset = 0;
         std::u32string value_utf32;
@@ -126,21 +58,13 @@ struct PunycodeEncodeImpl
 
             const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
             value_utf32.resize(value_utf32_length);
-            ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
+            const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
+            if (codepoints == 0)
+                value_utf32.clear(); /// input was empty or it is not valid UTF-8
 
             const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
             if (!ok)
-            {
-                if constexpr (error_handling == ErrorHandling::Throw)
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
-                }
-                else
-                {
-                    value_puny.clear();
-                    col_res_null->getData()[row] = 1;
-                }
-            }
+                value_puny.clear();
 
             res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
             res_offsets.push_back(res_data.size());
@@ -151,27 +75,26 @@ struct PunycodeEncodeImpl
             value_puny.clear(); /// utf32_to_punycode() appends to its output string
         }
     }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
+    }
 };
 
 
-template <ErrorHandling error_handling_>
-struct PunycodeDecodeImpl
+template <ErrorHandling error_handling>
+struct PunycodeDecode
 {
-    static constexpr auto error_handling = error_handling_;
-    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "punycodeDecodeOrNull" : "punycodeDecode";
-
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets,
-        ColumnUInt8::MutablePtr & col_res_null)
+        ColumnString::Offsets & res_offsets)
     {
         const size_t rows = offsets.size();
         res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
         res_offsets.reserve(rows);
-        if constexpr (error_handling == ErrorHandling::Null)
-            col_res_null = ColumnUInt8::create(rows, 0);
 
         size_t prev_offset = 0;
         std::u32string value_utf32;
@@ -191,8 +114,8 @@ struct PunycodeDecodeImpl
                 }
                 else
                 {
+                    static_assert(error_handling == ErrorHandling::Empty);
                     value_utf32.clear();
-                    col_res_null->getData()[row] = 1;
                 }
             }
 
@@ -209,115 +132,26 @@ struct PunycodeDecodeImpl
             value_utf8.clear();
         }
     }
-};
 
-
-/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
-/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
-/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
-/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
-template <ErrorHandling error_handling_>
-struct IdnaEncodeImpl
-{
-    static constexpr auto error_handling = error_handling_;
-    static constexpr auto name = (error_handling == ErrorHandling::Null) ? "idnaEncodeOrNull" : "idnaEncode";
-
-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets,
-        ColumnUInt8::MutablePtr & col_res_null)
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
     {
-        const size_t rows = offsets.size();
-        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
-        res_offsets.reserve(rows);
-        if constexpr (error_handling == ErrorHandling::Null)
-            col_res_null = ColumnUInt8::create(rows, 0);
-
-        size_t prev_offset = 0;
-        std::string ascii;
-        for (size_t row = 0; row < rows; ++row)
-        {
-            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
-            const size_t value_length = offsets[row] - prev_offset - 1;
-            std::string_view value_view(value, value_length);
-
-            if (!value_view.empty()) /// to_ascii() expects non-empty input
-            {
-                ascii = ada::idna::to_ascii(value_view);
-                const bool ok = !ascii.empty();
-                if (!ok)
-                {
-                    if constexpr (error_handling == ErrorHandling::Throw)
-                    {
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
-                    }
-                    else
-                    {
-                        ascii.clear();
-                        col_res_null->getData()[row] = 1;
-                    }
-                }
-            }
-
-            res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
-            res_offsets.push_back(res_data.size());
-
-            prev_offset = offsets[row];
-
-            ascii.clear();
-        }
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
     }
 };
 
+struct NamePunycodeEncode { static constexpr auto name = "punycodeEncode"; };
+struct NamePunycodeDecode { static constexpr auto name = "punycodeDecode"; };
+struct NameTryPunycodeDecode { static constexpr auto name = "tryPunycodeDecode"; };
 
-/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
-/// Therefore, this struct implements no error handling.
-struct IdnaDecodeImpl
-{
-    static constexpr auto error_handling = ErrorHandling::Throw; /// dummy
-    static constexpr auto name = "idnaDecode";
-
-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets,
-        ColumnUInt8::MutablePtr & /*col_res_null*/)
-    {
-        const size_t rows = offsets.size();
-        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
-        res_offsets.reserve(rows);
-
-        size_t prev_offset = 0;
-        std::string unicode;
-        for (size_t row = 0; row < rows; ++row)
-        {
-            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
-            const size_t value_length = offsets[row] - prev_offset - 1;
-            std::string_view value_view(value, value_length);
-
-            unicode = ada::idna::to_unicode(value_view);
-
-            res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
-            res_offsets.push_back(res_data.size());
-
-            prev_offset = offsets[row];
-
-            unicode.clear();
-        }
-    }
-};
-
-
-}
+using FunctionPunycodeEncode = FunctionStringToString<PunycodeEncode, NamePunycodeEncode>;
+using FunctionPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Throw>, NamePunycodeDecode>;
+using FunctionTryPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Empty>, NameTryPunycodeDecode>;
 
 REGISTER_FUNCTION(Punycode)
 {
-    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeEncode>(FunctionDocumentation{
         .description=R"(
-Computes a Punycode representation of a string. Throws an exception in case of error.)",
+Computes a Punycode representation of a string.)",
         .syntax="punycodeEncode(str)",
         .arguments={{"str", "Input string"}},
         .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
@@ -332,26 +166,9 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionPunycodeDecode>(FunctionDocumentation{
         .description=R"(
-Computes a Punycode representation of a string. Returns NULL in case of error)",
-        .syntax="punycodeEncode(str)",
-        .arguments={{"str", "Input string"}},
-        .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
-        .examples={
-            {"simple",
-            "SELECT punycodeEncodeOrNull('München') AS puny;",
-            R"(
-┌─puny───────┐
-│ Mnchen-3ya │
-└────────────┘
-            )"
-            }}
-    });
-
-    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeDecodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
-        .description=R"(
-Computes a Punycode representation of a string. Throws an exception in case of error.)",
+Computes a Punycode representation of a string. Throws an exception if the input is not valid Punycode.)",
         .syntax="punycodeDecode(str)",
         .arguments={{"str", "A Punycode-encoded string"}},
         .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
@@ -366,15 +183,15 @@ Computes a Punycode representation of a string. Throws an exception in case of e
             }}
     });
 
-    factory.registerFunction<FunctionIdnaPunycodeBase<PunycodeDecodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
+    factory.registerFunction<FunctionTryPunycodeDecode>(FunctionDocumentation{
         .description=R"(
-Computes a Punycode representation of a string. Returns NULL in case of error)",
+Computes a Punycode representation of a string. Returns an empty string if the input is not valid Punycode.)",
         .syntax="punycodeDecode(str)",
         .arguments={{"str", "A Punycode-encoded string"}},
         .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
         .examples={
             {"simple",
-            "SELECT punycodeDecodeOrNull('Mnchen-3ya') AS plain;",
+            "SELECT tryPunycodeDecode('Mnchen-3ya') AS plain;",
             R"(
 ┌─plain───┐
 │ München │
@@ -382,57 +199,6 @@ Computes a Punycode representation of a string. Returns NULL in case of error)",
             )"
             }}
     });
-
-    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaEncodeImpl<ErrorHandling::Throw>>>(FunctionDocumentation{
-        .description=R"(
-Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
-        .syntax="idnaEncode(str)",
-        .arguments={{"str", "Input string"}},
-        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
-        .examples={
-            {"simple",
-            "SELECT idnaEncode('straße.münchen.de') AS ascii;",
-            R"(
-┌─ascii───────────────────────────┐
-│ xn--strae-oqa.xn--mnchen-3ya.de │
-└─────────────────────────────────┘
-            )"
-            }}
-    });
-
-    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaEncodeImpl<ErrorHandling::Null>>>(FunctionDocumentation{
-        .description=R"(
-Computes a ASCII representation of an Internationalized Domain Name. Returns NULL in case of error)",
-        .syntax="punycodeEncode(str)",
-        .arguments={{"str", "Input string"}},
-        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
-        .examples={
-            {"simple",
-            "SELECT idnaEncodeOrNull('München') AS ascii;",
-            R"(
-┌─ascii───────────────────────────┐
-│ xn--strae-oqa.xn--mnchen-3ya.de │
-└─────────────────────────────────┘
-            )"
-            }}
-    });
-
-    factory.registerFunction<FunctionIdnaPunycodeBase<IdnaDecodeImpl>>(FunctionDocumentation{
-        .description=R"(
-Computes a Unicode representation of an Internationalized Domain Name.)",
-        .syntax="idnaDecode(str)",
-        .arguments={{"str", "Input string"}},
-        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
-        .examples={
-            {"simple",
-            "SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
-            R"(
-┌─unicode───────────┐
-│ straße.münchen.de │
-└───────────────────┘
-            )"
-            }}
-    });
 }
 
 }
diff --git a/tests/queries/0_stateless/02932_idna.reference b/tests/queries/0_stateless/02932_idna.reference
index 8bfba53ad11..0947194c07f 100644
--- a/tests/queries/0_stateless/02932_idna.reference
+++ b/tests/queries/0_stateless/02932_idna.reference
@@ -1,24 +1,24 @@
 -- Negative tests
 -- Regular cases
-straße.de	xn--strae-oqa.de	straße.de	xn--strae-oqa.de	straße.de
+straße.de	xn--strae-oqa.de	xn--strae-oqa.de	straße.de	straße.de
 2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888	2001:4860:4860::8888
 AMAZON	amazon	amazon	amazon	amazon
 aa--	aa--	aa--	aa--	aa--
-a†--	xn--a---kp0a	a†--	xn--a---kp0a	a†--
+a†--	xn--a---kp0a	xn--a---kp0a	a†--	a†--
 ab--c	ab--c	ab--c	ab--c	ab--c
--†	xn----xhn	-†	xn----xhn	-†
--x.xn--zca	-x.xn--zca	-x.ß	-x.xn--zca	-x.ß
-x-.xn--zca	x-.xn--zca	x-.ß	x-.xn--zca	x-.ß
-x-.ß	x-.xn--zca	x-.ß	x-.xn--zca	x-.ß
-x..ß	x..xn--zca	x..ß	x..xn--zca	x..ß
+-†	xn----xhn	xn----xhn	-†	-†
+-x.xn--zca	-x.xn--zca	-x.xn--zca	-x.ß	-x.ß
+x-.xn--zca	x-.xn--zca	x-.xn--zca	x-.ß	x-.ß
+x-.ß	x-.xn--zca	x-.xn--zca	x-.ß	x-.ß
+x..ß	x..xn--zca	x..xn--zca	x..ß	x..ß
 128.0,0.1	128.0,0.1	128.0,0.1	128.0,0.1	128.0,0.1
-xn--zca.xn--zca	xn--zca.xn--zca	ß.ß	xn--zca.xn--zca	ß.ß
-xn--zca.ß	xn--zca.xn--zca	ß.ß	xn--zca.xn--zca	ß.ß
+xn--zca.xn--zca	xn--zca.xn--zca	xn--zca.xn--zca	ß.ß	ß.ß
+xn--zca.ß	xn--zca.xn--zca	xn--zca.xn--zca	ß.ß	ß.ß
 x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x	x01234567890123456789012345678901234567890123456789012345678901x
-x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß
-x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß
+x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.ß
+x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.xn--zca	x01234567890123456789012345678901234567890123456789012345678901x.ß	x01234567890123456789012345678901234567890123456789012345678901x.ß
 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x	01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x
-≠	xn--1ch	≠	xn--1ch	≠
+≠	xn--1ch	xn--1ch	≠	≠
 aa--	aa--	aa--	aa--
 ab--c	ab--c	ab--c	ab--c
 -x	-x	-x	-x
@@ -54,22 +54,35 @@ xn--55qw42g.xn--55qw42g	公益.公益	xn--55qw42g.xn--55qw42g	xn--55qw42g.xn--55
 ≠	≠	xn--1ch	xn--1ch
 ファッション.biz	ファッション.biz	xn--bck1b9a5dre4c.biz	xn--bck1b9a5dre4c.biz
 -- Special cases
+---- Empty input
 
 
 
+---- NULL input
 \N
 \N
 \N
-\N
-\N
-\N
-\N
+---- Garbage inputs for idnaEncode
+
+
+
+
+---- Long input
+Row 1:
+──────
+idna:         Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+ascii:        wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
+ascii_try:    wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk.
+original:     wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
+original_try: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen.
+---- Non-const input
 				
-münchen	xn--mnchen-3ya	münchen	xn--mnchen-3ya	münchen
-straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de	straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de	straße.münchen.de
-london.co.uk	london.co.uk
-microsoft.com	microsoft.com
-straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de
-xn--	\N
-xn--	\N
-xn--tešla	\N
+münchen	xn--mnchen-3ya	xn--mnchen-3ya	münchen	münchen
+straße.münchen.de	xn--strae-oqa.xn--mnchen-3ya.de	xn--strae-oqa.xn--mnchen-3ya.de	straße.münchen.de	straße.münchen.de
+---- Non-const input with invalid values sprinkled in
+london.co.uk	london.co.uk	london.co.uk
+microsoft.com	microsoft.com	microsoft.com
+xn--		
+xn--		
+xn--tešla		
+ytraße.münchen.de	xn--ytrae-oqa.xn--mnchen-3ya.de	ytraße.münchen.de
diff --git a/tests/queries/0_stateless/02932_idna.sql b/tests/queries/0_stateless/02932_idna.sql
index 3572d4a6aec..db7688064f2 100644
--- a/tests/queries/0_stateless/02932_idna.sql
+++ b/tests/queries/0_stateless/02932_idna.sql
@@ -6,118 +6,119 @@
 SELECT '-- Negative tests';
 
 SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT idnaEncodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryIdnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
 SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT idnaEncodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT tryIdnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT idnaEncodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryIdnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
-SELECT idnaEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT idnaEncodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT idnaDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT idnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
+SELECT tryIdnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
+SELECT idnaDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
 
 SELECT '-- Regular cases';
 
 -- The test cases originate from the ada idna unit tests:
 -- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt
 -- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt
+--
+SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '-†' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
+SELECT '≠' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try;
 
-SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '-†' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-SELECT '≠' AS idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull;
-
-SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, idnaEncodeOrNull(unicode) AS originalOrNull;
-
+SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try;
+--
 SELECT '-- Special cases';
 
+SELECT '---- Empty input';
 SELECT idnaEncode('');
-SELECT idnaEncodeOrNull('');
+SELECT tryIdnaEncode('');
 SELECT idnaDecode('');
 
+SELECT '---- NULL input';
 SELECT idnaEncode(NULL);
-SELECT idnaEncodeOrNull(NULL);
+SELECT tryIdnaEncode(NULL);
 SELECT idnaDecode(NULL);
 
--- garbage IDNA/unicode values, see
+SELECT '---- Garbage inputs for idnaEncode';
 -- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt
--- only idnaEncode() is tested, idnaDecode() has by definition no invalid input values
 SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS }
-SELECT idnaEncodeOrNull('xn--');
+SELECT tryIdnaEncode('xn--');
 SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS }
-SELECT idnaEncodeOrNull('ﻱa');
+SELECT tryIdnaEncode('ﻱa');
 SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS }
-SELECT idnaEncodeOrNull('xn--a-yoc');
+SELECT tryIdnaEncode('xn--a-yoc');
 SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS }
-SELECT idnaEncodeOrNull('xn--tešla');
+SELECT tryIdnaEncode('xn--tešla');
 
--- long input
--- SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, idnaEncodeOrNull(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull FORMAT Vertical;
+SELECT '---- Long input';
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FORMAT Vertical;
 
--- non-const values
+SELECT '---- Non-const input';
 DROP TABLE IF EXISTS tab;
 CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
 INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen');
-SELECT idna, idnaEncode(idna) AS ascii, idnaDecode(ascii) AS original, idnaEncodeOrNull(idna) AS asciiOrNull, idnaDecode(asciiOrNull) AS originalOrNull FROM tab;
+SELECT idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FROM tab;
 DROP TABLE tab;
 
--- non-const values with a few invalid values for testing the OrNull variants
+SELECT '---- Non-const input with invalid values sprinkled in';
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (ascii String) ENGINE=MergeTree ORDER BY ascii;
-INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('straße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--');
-SELECT ascii, idnaEncode(ascii) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
-SELECT ascii, idnaEncodeOrNull(ascii) AS original FROM tab;
+CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna;
+INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('ytraße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--');
+SELECT idna, idnaEncode(idna) AS ascii FROM tab; -- { serverError BAD_ARGUMENTS }
+SELECT idna, tryIdnaEncode(idna) AS ascii, idnaDecode(ascii) AS original FROM tab;
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference
index 76508525b19..ff05eaa72a3 100644
--- a/tests/queries/0_stateless/02932_punycode.reference
+++ b/tests/queries/0_stateless/02932_punycode.reference
@@ -1,52 +1,55 @@
 -- Negative tests
 -- Regular cases
-a	a-	a	a-	a
-A	A-	A	A-	A
---	---	--	---	--
-London	London-	London	London-	London
-Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson
-This has spaces	This has spaces-	This has spaces	This has spaces-	This has spaces
--> $1.00 <-	-> $1.00 <--	-> $1.00 <-	-> $1.00 <--	-> $1.00 <-
-а	80a	а	80a	а
-ü	tda	ü	tda	ü
-α	mxa	α	mxa	α
-例	fsq	例	fsq	例
-😉	n28h	😉	n28h	😉
-αβγ	mxacd	αβγ	mxacd	αβγ
-München	Mnchen-3ya	München	Mnchen-3ya	München
-Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya
-München-Ost	Mnchen-Ost-9db	München-Ost	Mnchen-Ost-9db	München-Ost
-Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost
-abæcdöef	abcdef-qua4k	abæcdöef	abcdef-qua4k	abæcdöef
-правда	80aafi6cg	правда	80aafi6cg	правда
-ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข
-ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例
-MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前
-「bücher」	bcher-kva8445foa	「bücher」	bcher-kva8445foa	「bücher」
-团淄	3bs854c	团淄	3bs854c	团淄
+a	a-	a	a
+A	A-	A	A
+--	---	--	--
+London	London-	London	London
+Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson	Lloyd-Atkinson
+This has spaces	This has spaces-	This has spaces	This has spaces
+-> $1.00 <-	-> $1.00 <--	-> $1.00 <-	-> $1.00 <-
+а	80a	а	а
+ü	tda	ü	ü
+α	mxa	α	α
+例	fsq	例	例
+😉	n28h	😉	😉
+αβγ	mxacd	αβγ	αβγ
+München	Mnchen-3ya	München	München
+Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya	Mnchen-3ya
+München-Ost	Mnchen-Ost-9db	München-Ost	München-Ost
+Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost	Bahnhof München-Ost
+abæcdöef	abcdef-qua4k	abæcdöef	abæcdöef
+правда	80aafi6cg	правда	правда
+ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข	ยจฆฟคฏข
+ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例	ドメイン名例
+MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前	MajiでKoiする5秒前
+「bücher」	bcher-kva8445foa	「bücher」	「bücher」
+团淄	3bs854c	团淄	团淄
 -- Special cases
+---- Empty input
 
 
 
+---- NULL input
+\N
+\N
+\N
+---- Garbage Punycode-encoded input
 
-\N
-\N
-\N
-\N
-\N
+---- Long input
 Row 1:
 ──────
-str:            Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
-puny:           Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
-original:       Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
-punyOrNull:     Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
-originalOrNull: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
-München	Mnchen-3ya	München	Mnchen-3ya	München
-abc	abc-	abc	abc-	abc
-aäoöuü	aou-qla5gqb	aäoöuü	aou-qla5gqb	aäoöuü
-Also no punycode	\N
+str:          Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+puny:         Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa
+original:     Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+original_try: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+---- Non-const values
+München	Mnchen-3ya	München	München
+abc	abc-	abc	abc
+aäoöuü	aou-qla5gqb	aäoöuü	aäoöuü
+---- Non-const values with invalid values sprinkled in
+Also no punycode	
 London-	London
 Mnchen-3ya	München
-No punycode	\N
+No punycode	
 Rtting-3ya	Rütting
-XYZ no punycode	\N
+XYZ no punycode	
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
index 8df47cbf3da..b9bcf933641 100644
--- a/tests/queries/0_stateless/02932_punycode.sql
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -6,85 +6,81 @@
 SELECT '-- Negative tests';
 
 SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeEncodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeDecodeOrNull(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryPunycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
 SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeEncodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeDecodeOrNull(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT tryPunycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeEncodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT punycodeDecodeOrNull('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryPunycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
-SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeEncodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT punycodeDecodeOrNull(toFixedString('two', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
+SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
+SELECT tryPunycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED }
 
 SELECT '-- Regular cases';
 
 -- The test cases originate from the ada idna unit tests:
 -- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
 
-SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull;
-
+SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try;
+--
 SELECT '-- Special cases';
 
+SELECT '---- Empty input';
 SELECT punycodeEncode('');
-SELECT punycodeEncodeOrNull('');
 SELECT punycodeDecode('');
-SELECT punycodeDecodeOrNull('');
+SELECT tryPunycodeDecode('');
 
+SELECT '---- NULL input';
 SELECT punycodeEncode(NULL);
-SELECT punycodeEncodeOrNull(NULL);
 SELECT punycodeDecode(NULL);
-SELECT punycodeDecodeOrNull(NULL);
+SELECT tryPunycodeDecode(NULL);
 
--- garbage Punycode-encoded values
+SELECT '---- Garbage Punycode-encoded input';
 SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
-SELECT punycodeDecodeOrNull('no punycode');
+SELECT tryPunycodeDecode('no punycode');
 
--- long input
-SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) as punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull FORMAT Vertical;
+SELECT '---- Long input';
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FORMAT Vertical;
 
--- non-const values
+SELECT '---- Non-const values';
 DROP TABLE IF EXISTS tab;
 CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
 INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
-SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, punycodeEncodeOrNull(str) AS punyOrNull, punycodeDecodeOrNull(punyOrNull) AS originalOrNull FROM tab;
+SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FROM tab;
 DROP TABLE tab;
 
--- non-const values with a few invalid values for testing the OrNull variants
+SELECT '---- Non-const values with invalid values sprinkled in';
 DROP TABLE IF EXISTS tab;
 CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny;
 INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode');
 SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS }
-SELECT puny, punycodeDecodeOrNull(puny) AS original FROM tab;
+SELECT puny, tryPunycodeDecode(puny) AS original FROM tab;
 DROP TABLE tab;

From c0a44ffce487be24d4767d93d45a31fb4a72a1a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 5 Jan 2024 13:46:18 +0100
Subject: [PATCH 173/204] Revert "Merging #53757"

---
 src/Core/Settings.h                      |  2 +-
 src/Interpreters/Aggregator.cpp          | 43 ++++++++++++------------
 src/Interpreters/JIT/compileFunction.cpp | 38 +++++----------------
 3 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b41e7869fae..17f4213a2cc 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -157,7 +157,7 @@ class IColumn;
     M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
     M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
     M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
-    M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
+    M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
     M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
     M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
     M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 07c52d50e18..cdc4292a79c 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -664,26 +664,26 @@ void Aggregator::compileAggregateFunctionsIfNeeded()
     for (size_t i = 0; i < aggregate_functions.size(); ++i)
     {
         const auto * function = aggregate_functions[i];
-        bool function_is_compilable = function->isCompilable();
-        if (!function_is_compilable)
-            continue;
-
         size_t offset_of_aggregate_function = offsets_of_aggregate_states[i];
-        AggregateFunctionWithOffset function_to_compile
+
+        if (function->isCompilable())
         {
-            .function = function,
-            .aggregate_data_offset = offset_of_aggregate_function
-        };
+            AggregateFunctionWithOffset function_to_compile
+            {
+                .function = function,
+                .aggregate_data_offset = offset_of_aggregate_function
+            };
 
-        functions_to_compile.emplace_back(std::move(function_to_compile));
+            functions_to_compile.emplace_back(std::move(function_to_compile));
 
-        functions_description += function->getDescription();
-        functions_description += ' ';
+            functions_description += function->getDescription();
+            functions_description += ' ';
 
-        functions_description += std::to_string(offset_of_aggregate_function);
-        functions_description += ' ';
+            functions_description += std::to_string(offset_of_aggregate_function);
+            functions_description += ' ';
+        }
 
-        is_aggregate_function_compiled[i] = true;
+        is_aggregate_function_compiled[i] = function->isCompilable();
     }
 
     if (functions_to_compile.empty())
@@ -1685,13 +1685,14 @@ bool Aggregator::executeOnBlock(Columns columns,
     /// For the case when there are no keys (all aggregate into one row).
     if (result.type == AggregatedDataVariants::Type::without_key)
     {
-#if USE_EMBEDDED_COMPILER
-        if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_functions_instructions.data()))
-        {
-            executeWithoutKeyImpl<true>(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool);
-        }
-        else
-#endif
+        /// TODO: Enable compilation after investigation
+// #if USE_EMBEDDED_COMPILER
+//         if (compiled_aggregate_functions_holder)
+//         {
+//             executeWithoutKeyImpl<true>(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool);
+//         }
+//         else
+// #endif
         {
             executeWithoutKeyImpl<false>(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool);
         }
diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp
index 1c6b324dad7..f50a122f9a2 100644
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@@ -67,8 +67,7 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
 {
     const auto & function_argument_types = function.getArgumentTypes();
 
-    auto & context = module.getContext();
-    llvm::IRBuilder<> b(context);
+    llvm::IRBuilder<> b(module.getContext());
     auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
     auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy());
     auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, data_type->getPointerTo() }, /*isVarArg=*/false);
@@ -76,8 +75,6 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
     /// Create function in module
 
     auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, function.getName(), module);
-    func->setAttributes(llvm::AttributeList::get(context, {{2, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)}}));
-
     auto * args = func->args().begin();
     llvm::Value * rows_count_arg = args++;
     llvm::Value * columns_arg = args++;
@@ -199,9 +196,6 @@ static void compileCreateAggregateStatesFunctions(llvm::Module & module, const s
     auto * create_aggregate_states_function_type = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_places_type }, false);
     auto * create_aggregate_states_function = llvm::Function::Create(create_aggregate_states_function_type, llvm::Function::ExternalLinkage, name, module);
 
-    create_aggregate_states_function->setAttributes(
-        llvm::AttributeList::get(context, {{1, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)}}));
-
     auto * arguments = create_aggregate_states_function->args().begin();
     llvm::Value * aggregate_data_place_arg = arguments++;
 
@@ -247,11 +241,6 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module,
     auto * add_into_aggregate_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_type->getPointerTo(), places_type }, false);
     auto * add_into_aggregate_states_func = llvm::Function::Create(add_into_aggregate_states_func_declaration, llvm::Function::ExternalLinkage, name, module);
 
-    add_into_aggregate_states_func->setAttributes(llvm::AttributeList::get(
-        context,
-        {{3, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)},
-         {4, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)}}));
-
     auto * arguments = add_into_aggregate_states_func->args().begin();
     llvm::Value * row_start_arg = arguments++;
     llvm::Value * row_end_arg = arguments++;
@@ -307,7 +296,7 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module,
     llvm::Value * aggregation_place = nullptr;
 
     if (places_argument_type == AddIntoAggregateStatesPlacesArgumentType::MultiplePlaces)
-        aggregation_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateInBoundsGEP(b.getInt8Ty()->getPointerTo(), places_arg, counter_phi));
+        aggregation_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateGEP(b.getInt8Ty()->getPointerTo(), places_arg, counter_phi));
     else
         aggregation_place = places_arg;
 
@@ -324,7 +313,7 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module,
             auto & column = columns[previous_columns_size + column_argument_index];
             const auto & argument_type = arguments_types[column_argument_index];
 
-            auto * column_data_element = b.CreateLoad(column.data_element_type, b.CreateInBoundsGEP(column.data_element_type, column.data_ptr, counter_phi));
+            auto * column_data_element = b.CreateLoad(column.data_element_type, b.CreateGEP(column.data_element_type, column.data_ptr, counter_phi));
 
             if (!argument_type->isNullable())
             {
@@ -332,7 +321,7 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module,
                 continue;
             }
 
-            auto * column_null_data_with_offset = b.CreateInBoundsGEP(b.getInt8Ty(), column.null_data_ptr, counter_phi);
+            auto * column_null_data_with_offset = b.CreateGEP(b.getInt8Ty(), column.null_data_ptr, counter_phi);
             auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_null_data_with_offset), b.getInt8(0));
             auto * nullable_unitialized = llvm::Constant::getNullValue(toNullableType(b, column.data_element_type));
             auto * first_insert = b.CreateInsertValue(nullable_unitialized, column_data_element, {0});
@@ -365,8 +354,7 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module,
 
 static void compileMergeAggregatesStates(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
 {
-    auto & context = module.getContext();
-    llvm::IRBuilder<> b(context);
+    llvm::IRBuilder<> b(module.getContext());
 
     auto * aggregate_data_place_type = b.getInt8Ty()->getPointerTo();
     auto * aggregate_data_places_type = aggregate_data_place_type->getPointerTo();
@@ -377,11 +365,6 @@ static void compileMergeAggregatesStates(llvm::Module & module, const std::vecto
     auto * merge_aggregates_states_func
         = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module);
 
-    merge_aggregates_states_func->setAttributes(llvm::AttributeList::get(
-        context,
-        {{1, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)},
-         {2, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)}}));
-
     auto * arguments = merge_aggregates_states_func->args().begin();
     llvm::Value * aggregate_data_places_dst_arg = arguments++;
     llvm::Value * aggregate_data_places_src_arg = arguments++;
@@ -443,11 +426,6 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons
     auto * insert_aggregates_into_result_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_type->getPointerTo(), aggregate_data_places_type }, false);
     auto * insert_aggregates_into_result_func = llvm::Function::Create(insert_aggregates_into_result_func_declaration, llvm::Function::ExternalLinkage, name, module);
 
-    insert_aggregates_into_result_func->setAttributes(llvm::AttributeList::get(
-        context,
-        {{3, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)},
-         {4, llvm::Attribute::get(context, llvm::Attribute::AttrKind::NoAlias)}}));
-
     auto * arguments = insert_aggregates_into_result_func->args().begin();
     llvm::Value * row_start_arg = arguments++;
     llvm::Value * row_end_arg = arguments++;
@@ -482,7 +460,7 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons
     auto * counter_phi = b.CreatePHI(row_start_arg->getType(), 2);
     counter_phi->addIncoming(row_start_arg, entry);
 
-    auto * aggregate_data_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateInBoundsGEP(b.getInt8Ty()->getPointerTo(), aggregate_data_places_arg, counter_phi));
+    auto * aggregate_data_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateGEP(b.getInt8Ty()->getPointerTo(), aggregate_data_places_arg, counter_phi));
 
     for (size_t i = 0; i < functions.size(); ++i)
     {
@@ -492,11 +470,11 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons
         const auto * aggregate_function_ptr = functions[i].function;
         auto * final_value = aggregate_function_ptr->compileGetResult(b, aggregation_place_with_offset);
 
-        auto * result_column_data_element = b.CreateInBoundsGEP(columns[i].data_element_type, columns[i].data_ptr, counter_phi);
+        auto * result_column_data_element = b.CreateGEP(columns[i].data_element_type, columns[i].data_ptr, counter_phi);
         if (columns[i].null_data_ptr)
         {
             b.CreateStore(b.CreateExtractValue(final_value, {0}), result_column_data_element);
-            auto * result_column_is_null_element = b.CreateInBoundsGEP(b.getInt8Ty(), columns[i].null_data_ptr, counter_phi);
+            auto * result_column_is_null_element = b.CreateGEP(b.getInt8Ty(), columns[i].null_data_ptr, counter_phi);
             b.CreateStore(b.CreateSelect(b.CreateExtractValue(final_value, {1}), b.getInt8(1), b.getInt8(0)), result_column_is_null_element);
         }
         else

From 0f49e8c0f056ba507b7b702a35e422ef849945d9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 12:38:03 +0000
Subject: [PATCH 174/204] Fixing tests.

---
 ...cal_expressions_optimizer_low_cardinality.reference |  6 +++---
 ...gical_optimizer_removing_redundant_checks.reference |  4 ++--
 .../02952_conjunction_optimization.reference           | 10 +++++-----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index b56b5166cff..649b037fafa 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -44,7 +44,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: notIn, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -62,7 +62,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: notIn, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -106,7 +106,7 @@ QUERY id: 0
     LIST id: 1, nodes: 1
       COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+    TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
     FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
index 20f6784a945..cf60d63b1cf 100644
--- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
@@ -97,7 +97,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: notIn, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -115,7 +115,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02668_logical_optimizer
+    TABLE id: 3, alias: __table1, table_name: default.02668_logical_optimizer
   WHERE
     FUNCTION id: 5, function_name: notEquals, function_type: ordinary, result_type: UInt8
       ARGUMENTS
diff --git a/tests/queries/0_stateless/02952_conjunction_optimization.reference b/tests/queries/0_stateless/02952_conjunction_optimization.reference
index 64663cea662..eeadfaae21d 100644
--- a/tests/queries/0_stateless/02952_conjunction_optimization.reference
+++ b/tests/queries/0_stateless/02952_conjunction_optimization.reference
@@ -9,7 +9,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02952_disjunction_optimization
+    TABLE id: 3, alias: __table1, table_name: default.02952_disjunction_optimization
   WHERE
     FUNCTION id: 5, function_name: notIn, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -27,7 +27,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02952_disjunction_optimization
+    TABLE id: 3, alias: __table1, table_name: default.02952_disjunction_optimization
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: Bool
       ARGUMENTS
@@ -48,7 +48,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02952_disjunction_optimization
+    TABLE id: 3, alias: __table1, table_name: default.02952_disjunction_optimization
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -73,7 +73,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02952_disjunction_optimization
+    TABLE id: 3, alias: __table1, table_name: default.02952_disjunction_optimization
   WHERE
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
@@ -100,7 +100,7 @@ QUERY id: 0
       COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
       COLUMN id: 4, column_name: b, result_type: String, source_id: 3
   JOIN TREE
-    TABLE id: 3, table_name: default.02952_disjunction_optimization
+    TABLE id: 3, alias: __table1, table_name: default.02952_disjunction_optimization
   WHERE
     FUNCTION id: 5, function_name: or, function_type: ordinary, result_type: UInt8
       ARGUMENTS

From 7d2dafb02415a387c9fe7bb191e669ac29230f8f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:02:42 +0000
Subject: [PATCH 175/204] Update version_date.tsv and changelogs after
 v23.12.2.59-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.12.2.59-stable.md | 32 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  3 +++
 5 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.12.2.59-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.12.2.59-stable.md b/docs/changelogs/v23.12.2.59-stable.md
new file mode 100644
index 00000000000..6533f4e6b86
--- /dev/null
+++ b/docs/changelogs/v23.12.2.59-stable.md
@@ -0,0 +1,32 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.12.2.59-stable (17ab210e761) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
+
+#### Backward Incompatible Change
+* Backported in [#58389](https://github.com/ClickHouse/ClickHouse/issues/58389): The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..5296a8426b0 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10

From 9d7912fa7559cf35a6ef6bf088833718dc165c6a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:04:23 +0000
Subject: [PATCH 176/204] Update version_date.tsv and changelogs after
 v23.11.4.24-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.11.4.24-stable.md | 26 ++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  4 ++++
 5 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.11.4.24-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.11.4.24-stable.md b/docs/changelogs/v23.11.4.24-stable.md
new file mode 100644
index 00000000000..40096285b06
--- /dev/null
+++ b/docs/changelogs/v23.11.4.24-stable.md
@@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.11.4.24-stable (e79d840d7fe) FIXME as compared to v23.11.3.23-stable (a14ab450b0e)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..79a8a16314e 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08

From da76f5117314832280c87015385af38c9948d7d5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 14:06:04 +0000
Subject: [PATCH 177/204] Fix tests.

---
 .../0_stateless/02514_analyzer_drop_join_on.reference  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index de2e21615c8..a5a71560d00 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -61,27 +61,27 @@ Header: a2 String
       Join (JOIN FillRightFirst)
       Header: __table1.a2 String
               __table1.k UInt64
-        Expression (DROP unused columns after JOIN)
+        Expression ((Actions for left table alias column keys + DROP unused columns after JOIN))
         Header: __table1.a2 String
                 __table1.k UInt64
           Join (JOIN FillRightFirst)
           Header: __table1.a2 String
                   __table1.k UInt64
-            Expression (Change column names to column identifiers)
+            Expression ((Actions for left table alias column keys + Change column names to column identifiers))
             Header: __table1.a2 String
                     __table1.k UInt64
               ReadFromMemoryStorage
               Header: a2 String
                       k UInt64
-            Expression (Change column names to column identifiers)
+            Expression ((Actions for right table alias column keys + Change column names to column identifiers))
             Header: __table2.k UInt64
               ReadFromMemoryStorage
               Header: k UInt64
-        Expression (Change column names to column identifiers)
+        Expression ((Actions for right table alias column keys + Change column names to column identifiers))
         Header: __table3.k UInt64
           ReadFromMemoryStorage
           Header: k UInt64
-    Expression (Change column names to column identifiers)
+    Expression ((Actions for right table alias column keys + Change column names to column identifiers))
     Header: __table4.d2 String
             __table4.k UInt64
       ReadFromMemoryStorage

From 5b9cc914db25a0ca89992f89c0a2b1d64102a6f1 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:11:11 +0000
Subject: [PATCH 178/204] Update version_date.tsv and changelogs after
 v23.8.9.54-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.8.9.54-lts.md    | 47 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  5 +++
 5 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.8.9.54-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.8.9.54-lts.md b/docs/changelogs/v23.8.9.54-lts.md
new file mode 100644
index 00000000000..00607c60c39
--- /dev/null
+++ b/docs/changelogs/v23.8.9.54-lts.md
@@ -0,0 +1,47 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.8.9.54-lts (192a1d231fa) FIXME as compared to v23.8.8.20-lts (5e012a03bf2)
+
+#### Improvement
+* Backported in [#57668](https://github.com/ClickHouse/ClickHouse/issues/57668): Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#58491](https://github.com/ClickHouse/ClickHouse/issues/58491): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57238](https://github.com/ClickHouse/ClickHouse/issues/57238): Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)).
+* Backported in [#57655](https://github.com/ClickHouse/ClickHouse/issues/57655): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57582](https://github.com/ClickHouse/ClickHouse/issues/57582): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)).
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Update PeekableWriteBuffer.cpp'. [#57701](https://github.com/ClickHouse/ClickHouse/pull/57701) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 0b04c5f68bc99a1a3b0175689f66f51ce21c073a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:11:15 +0000
Subject: [PATCH 179/204] Update version_date.tsv and changelogs after
 v23.10.6.60-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.10.6.60-stable.md | 51 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  5 +++
 5 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.10.6.60-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.10.6.60-stable.md b/docs/changelogs/v23.10.6.60-stable.md
new file mode 100644
index 00000000000..5e1c126e729
--- /dev/null
+++ b/docs/changelogs/v23.10.6.60-stable.md
@@ -0,0 +1,51 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.10.6.60-stable (68907bbe643) FIXME as compared to v23.10.5.20-stable (e84001e5c61)
+
+#### Improvement
+* Backported in [#58493](https://github.com/ClickHouse/ClickHouse/issues/58493): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57659](https://github.com/ClickHouse/ClickHouse/issues/57659): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57586](https://github.com/ClickHouse/ClickHouse/issues/57586): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)).
+* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)).
+* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix function jsonMergePatch for partially const columns [#57379](https://github.com/ClickHouse/ClickHouse/pull/57379) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)).
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NO CL CATEGORY
+
+* Backported in [#57916](https://github.com/ClickHouse/ClickHouse/issues/57916):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 21523820ab72c90a61b562095d6a9e2a5aa726f2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:14:00 +0000
Subject: [PATCH 180/204] Update version_date.tsv and changelogs after
 v23.3.19.32-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.3.19.32-lts.md   | 36 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  5 ++++
 5 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.3.19.32-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.3.19.32-lts.md b/docs/changelogs/v23.3.19.32-lts.md
new file mode 100644
index 00000000000..4604c986fe6
--- /dev/null
+++ b/docs/changelogs/v23.3.19.32-lts.md
@@ -0,0 +1,36 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.3.19.32-lts (c4d4ca8ec02) FIXME as compared to v23.3.18.15-lts (7228475d77a)
+
+#### Backward Incompatible Change
+* Backported in [#57840](https://github.com/ClickHouse/ClickHouse/issues/57840): Remove function `arrayFold` because it has a bug. This closes [#57816](https://github.com/ClickHouse/ClickHouse/issues/57816). This closes [#57458](https://github.com/ClickHouse/ClickHouse/issues/57458). [#57836](https://github.com/ClickHouse/ClickHouse/pull/57836) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Improvement
+* Backported in [#58489](https://github.com/ClickHouse/ClickHouse/issues/58489): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57653](https://github.com/ClickHouse/ClickHouse/issues/57653): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57580](https://github.com/ClickHouse/ClickHouse/issues/57580): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 701498f36406e48e8203ba2fad753df8d8edab36 Mon Sep 17 00:00:00 2001
From: Ladislav Snizek <ladislav.snizek@cdn77.com>
Date: Fri, 5 Jan 2024 16:08:25 +0100
Subject: [PATCH 181/204] Fix a link in the 23.12 changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 283000f1804..1e394164d8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,7 +22,7 @@
 * The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for the `OPTIMIZE` is not allowed by default (it can be unlocked with the `allow_experimental_replacing_merge_with_cleanup` setting). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 
 #### New Feature
-* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/57995). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)).
+* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/33919). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)).
 * Introduce `PASTE JOIN`, which allows users to join tables without `ON` clause simply by row numbers. Example: `SELECT * FROM (SELECT number AS a FROM numbers(2)) AS t1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY a DESC) AS t2`. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * The `ORDER BY` clause now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)).
 * Added a new mutation command `ALTER TABLE <table> APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)).

From c6858f86665b2ddd90292163f556a347693cff8d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 15:22:41 +0000
Subject: [PATCH 182/204] Fixing tidy

---
 src/Storages/MergeTree/MergeTreeIndexSet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h
index 7e60e5d6dc2..ea9f7ddef3d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexSet.h
+++ b/src/Storages/MergeTree/MergeTreeIndexSet.h
@@ -87,7 +87,7 @@ public:
         const String & index_name_,
         const Block & index_sample_block,
         size_t max_rows_,
-        const ActionsDAGPtr & filter_actions_dag,
+        const ActionsDAGPtr & filter_dag,
         ContextPtr context);
 
     bool alwaysUnknownOrTrue() const override;

From 149cd477988a326d4fc84a02b772b54409ee8f95 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <var1able@var1able.ru>
Date: Fri, 5 Jan 2024 18:31:49 +0100
Subject: [PATCH 183/204] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 283000f1804..0beb6f97af5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -375,6 +375,7 @@
 * Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)).
 * Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Arbitrary parts on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 
 #### New Feature
 * Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)).

From 33c143c21f326e6846726d74d7d145b911a39e74 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <var1able@var1able.ru>
Date: Fri, 5 Jan 2024 18:34:57 +0100
Subject: [PATCH 184/204] Typo

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0beb6f97af5..0355b21c962 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -375,7 +375,7 @@
 * Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)).
 * Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Arbitrary parts on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Arbitrary paths on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 
 #### New Feature
 * Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)).

From 3e128118cf78473b3677667e4384a73457e4466c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 Jan 2024 18:55:18 +0100
Subject: [PATCH 185/204] Add missing file

---
 .../01825_type_json_3.gen.reference           | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tests/queries/0_stateless/01825_type_json_3.gen.reference

diff --git a/tests/queries/0_stateless/01825_type_json_3.gen.reference b/tests/queries/0_stateless/01825_type_json_3.gen.reference
new file mode 100644
index 00000000000..7ca74f235ec
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_3.gen.reference
@@ -0,0 +1,59 @@
+1	('',0)	Tuple(\n    k1 String,\n    k2 Int8)
+2	('v1',2)	Tuple(\n    k1 String,\n    k2 Int8)
+1		0
+2	v1	2
+========
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
+1	[]	[]
+2	['v1','v4']	['v3','']
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
+3	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+4	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+1	[]	[]
+2	['v1','v4']	['v3','']
+3	[]	[]
+4	[]	[]
+all_2_2_0	data	Tuple(k1 Nested(k2 String, k3 String))
+all_3_3_0	data	Tuple(_dummy UInt8)
+data	Tuple(k1 Nested(k2 String, k3 String))
+1	[]	[]
+2	['v1','v4']	['v3','']
+3	[]	[]
+4	[]	[]
+========
+1	((1,'foo'),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+2	((0,''),[1,2,3])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+3	((10,''),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+1	1	foo	[]
+2	0		[1,2,3]
+3	10		[]
+1	('',0)	Tuple(\n    k1 String,\n    k2 Int8)
+2	('v1',2)	Tuple(\n    k1 String,\n    k2 Int8)
+1		0
+2	v1	2
+========
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
+1	[]	[]
+2	['v1','v4']	['v3','']
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
+3	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+4	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+1	[]	[]
+2	['v1','v4']	['v3','']
+3	[]	[]
+4	[]	[]
+1	[]	[]
+2	['v1','v4']	['v3','']
+3	[]	[]
+4	[]	[]
+========
+1	((1,'foo'),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+2	((0,''),[1,2,3])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+3	((10,''),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+1	1	foo	[]
+2	0		[1,2,3]
+3	10		[]

From d93db9efa685bcd0f3a03363d9406fe5072c4f2e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 5 Jan 2024 18:59:56 +0100
Subject: [PATCH 186/204] Fix reference

---
 .../01825_type_json_3.gen.reference           | 59 -------------------
 .../01825_type_json_3.reference.j2            | 22 +++----
 2 files changed, 11 insertions(+), 70 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01825_type_json_3.gen.reference

diff --git a/tests/queries/0_stateless/01825_type_json_3.gen.reference b/tests/queries/0_stateless/01825_type_json_3.gen.reference
deleted file mode 100644
index 7ca74f235ec..00000000000
--- a/tests/queries/0_stateless/01825_type_json_3.gen.reference
+++ /dev/null
@@ -1,59 +0,0 @@
-1	('',0)	Tuple(\n    k1 String,\n    k2 Int8)
-2	('v1',2)	Tuple(\n    k1 String,\n    k2 Int8)
-1		0
-2	v1	2
-========
-1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
-1	[]	[]
-2	['v1','v4']	['v3','']
-1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
-3	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-4	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-1	[]	[]
-2	['v1','v4']	['v3','']
-3	[]	[]
-4	[]	[]
-all_2_2_0	data	Tuple(k1 Nested(k2 String, k3 String))
-all_3_3_0	data	Tuple(_dummy UInt8)
-data	Tuple(k1 Nested(k2 String, k3 String))
-1	[]	[]
-2	['v1','v4']	['v3','']
-3	[]	[]
-4	[]	[]
-========
-1	((1,'foo'),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-2	((0,''),[1,2,3])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-3	((10,''),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-1	1	foo	[]
-2	0		[1,2,3]
-3	10		[]
-1	('',0)	Tuple(\n    k1 String,\n    k2 Int8)
-2	('v1',2)	Tuple(\n    k1 String,\n    k2 Int8)
-1		0
-2	v1	2
-========
-1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
-1	[]	[]
-2	['v1','v4']	['v3','']
-1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
-3	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-4	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
-1	[]	[]
-2	['v1','v4']	['v3','']
-3	[]	[]
-4	[]	[]
-1	[]	[]
-2	['v1','v4']	['v3','']
-3	[]	[]
-4	[]	[]
-========
-1	((1,'foo'),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-2	((0,''),[1,2,3])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-3	((10,''),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
-1	1	foo	[]
-2	0		[1,2,3]
-3	10		[]
diff --git a/tests/queries/0_stateless/01825_type_json_3.reference.j2 b/tests/queries/0_stateless/01825_type_json_3.reference.j2
index 23f38b74fd1..8646cf48872 100644
--- a/tests/queries/0_stateless/01825_type_json_3.reference.j2
+++ b/tests/queries/0_stateless/01825_type_json_3.reference.j2
@@ -1,17 +1,17 @@
 {% for engine in ["ReplicatedMergeTree('/clickhouse/tables/{database}/test_01825_3/t_json_3', 'r1') ORDER BY tuple()", "Memory"] -%}
-1	('',0)	Tuple(k1 String, k2 Int8)
-2	('v1',2)	Tuple(k1 String, k2 Int8)
+1	('',0)	Tuple(\n    k1 String,\n    k2 Int8)
+2	('v1',2)	Tuple(\n    k1 String,\n    k2 Int8)
 1		0
 2	v1	2
 ========
-1	([])	Tuple(k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(k1 Nested(k2 String, k3 String))
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
 1	[]	[]
 2	['v1','v4']	['v3','']
-1	([])	Tuple(k1 Nested(k2 String, k3 String))
-2	([('v1','v3'),('v4','')])	Tuple(k1 Nested(k2 String, k3 String))
-3	([])	Tuple(k1 Nested(k2 String, k3 String))
-4	([])	Tuple(k1 Nested(k2 String, k3 String))
+1	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+2	([('v1','v3'),('v4','')])	Tuple(\n    k1 Nested(k2 String, k3 String))
+3	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
+4	([])	Tuple(\n    k1 Nested(k2 String, k3 String))
 1	[]	[]
 2	['v1','v4']	['v3','']
 3	[]	[]
@@ -26,9 +26,9 @@ data	Tuple(k1 Nested(k2 String, k3 String))
 3	[]	[]
 4	[]	[]
 ========
-1	((1,'foo'),[])	Tuple(k1 Tuple(k2 Int8, k3 String), k4 Array(Int8))
-2	((0,''),[1,2,3])	Tuple(k1 Tuple(k2 Int8, k3 String), k4 Array(Int8))
-3	((10,''),[])	Tuple(k1 Tuple(k2 Int8, k3 String), k4 Array(Int8))
+1	((1,'foo'),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+2	((0,''),[1,2,3])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
+3	((10,''),[])	Tuple(\n    k1 Tuple(\n        k2 Int8,\n        k3 String),\n    k4 Array(Int8))
 1	1	foo	[]
 2	0		[1,2,3]
 3	10		[]

From ce93459149e50b0a75d3aaff04a943b92be107cf Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Fri, 5 Jan 2024 19:07:10 +0100
Subject: [PATCH 187/204] Fix test 02932_kill_query_sleep.

---
 .../0_stateless/02932_kill_query_sleep.sh     | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02932_kill_query_sleep.sh b/tests/queries/0_stateless/02932_kill_query_sleep.sh
index 08c375b875d..84e84204aa1 100755
--- a/tests/queries/0_stateless/02932_kill_query_sleep.sh
+++ b/tests/queries/0_stateless/02932_kill_query_sleep.sh
@@ -8,18 +8,31 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 function wait_query_started()
 {
     local query_id="$1"
-    $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
-    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.query_log WHERE query_id='$query_id' AND current_database = currentDatabase()") == 0 ]]; do
-        sleep 0.1;
-        $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;"
+    timeout=60
+    start=$EPOCHSECONDS
+    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") == 0 ]]; do
+          if ((EPOCHSECONDS-start > timeout )); then
+             echo "Timeout while waiting for query $query_id to start"
+             exit 1
+          fi
+          sleep 0.1
     done
 }
 
+
 function kill_query()
 {
     local query_id="$1"
     $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id'" >/dev/null
-    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") != 0 ]]; do sleep 0.1; done
+    timeout=60
+    start=$EPOCHSECONDS
+    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") != 0 ]]; do
+          if ((EPOCHSECONDS-start > timeout )); then
+             echo "Timeout while waiting for query $query_id to cancel"
+             exit 1
+          fi
+          sleep 0.1
+    done
 }
 
 

From 7e5ba620175ab7ca9ab3cf04bf511bfe26f38eae Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 5 Jan 2024 20:33:30 +0000
Subject: [PATCH 188/204] Allow to read Bool values into String in JSON input
 formats

---
 docs/en/interfaces/formats.md                 |  1 +
 docs/en/interfaces/schema-inference.md        | 20 +++++++++++++++++++
 .../operations/settings/settings-formats.md   |  6 ++++++
 src/Core/Settings.h                           |  1 +
 src/Core/SettingsChangesHistory.h             |  1 +
 .../Serializations/SerializationString.cpp    | 16 +++++++++++++++
 src/Formats/EscapingRuleUtils.cpp             |  3 ++-
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 src/Formats/SchemaInferenceUtils.cpp          | 20 +++++++++++++++++++
 src/IO/ReadHelpers.cpp                        | 10 +++++++---
 .../02961_read_bool_as_string_json.reference  | 12 +++++++++++
 .../02961_read_bool_as_string_json.sql        |  9 +++++++++
 13 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02961_read_bool_as_string_json.reference
 create mode 100644 tests/queries/0_stateless/02961_read_bool_as_string_json.sql

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 836b1f2f637..ed67af48af7 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1262,6 +1262,7 @@ SELECT * FROM json_each_row_nested
 
 - [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
 - [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
+- [input_format_json_read_bools_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_strings) - allow to parse bools as strings in JSON input formats. Default value - `true`.
 - [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `true`.
 - [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
 - [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index ef858796936..4db1d53987a 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -614,6 +614,26 @@ DESC format(JSONEachRow, $$
 └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
+##### input_format_json_read_bools_as_strings
+
+Enabling this setting allows reading Bool values as strings.
+
+This setting is enabled by default.
+
+**Example:**
+
+```sql
+SET input_format_json_read_bools_as_strings = 1;
+DESC format(JSONEachRow, $$
+                                {"value" : true}
+                                {"value" : "Hello, World"}
+                         $$)
+```
+```response
+┌─name──┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ value │ Nullable(String) │              │                    │         │                  │                │
+└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
 ##### input_format_json_read_arrays_as_strings
 
 Enabling this setting allows reading JSON array values as strings.
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 3d76bd9df73..43a73844b79 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -377,6 +377,12 @@ Allow parsing bools as numbers in JSON input formats.
 
 Enabled by default.
 
+## input_format_json_read_bools_as_strings {#input_format_json_read_bools_as_strings}
+
+Allow parsing bools as strings in JSON input formats.
+
+Enabled by default.
+
 ## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
 
 Allow parsing numbers as strings in JSON input formats.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b41e7869fae..acc47d621fb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -983,6 +983,7 @@ class IColumn;
     M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
     M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
     M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
+    M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
     M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
     M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
     M(Bool, input_format_json_read_numbers_as_strings, true, "Allow to parse numbers as strings in JSON input formats", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index aad57ffebb7..8f8e7b7af7a 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -81,6 +81,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.1", {{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index 788ff429088..b2b083fd466 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -335,6 +335,22 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
     {
         read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
     }
+    else if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
+    {
+        String str_value;
+        if (*istr.position() == 't')
+        {
+            assertString("true", istr);
+            str_value = "true";
+        }
+        else if (*istr.position() == 'f')
+        {
+            assertString("false", istr);
+            str_value = "false";
+        }
+
+        read(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
+    }
     else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
     {
         String field;
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 9cc7cb3b89e..a7e9fb8e99f 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -450,10 +450,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
             break;
         case FormatSettings::EscapingRule::JSON:
             result += fmt::format(
-                ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
+                ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
                 "read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}",
                 settings.json.try_infer_numbers_from_strings,
                 settings.json.read_bools_as_numbers,
+                settings.json.read_bools_as_strings,
                 settings.json.read_objects_as_strings,
                 settings.json.read_numbers_as_strings,
                 settings.json.read_arrays_as_strings,
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 15743365d7d..0344ed54ae3 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
     format_settings.json.quote_decimals = settings.output_format_json_quote_decimals;
     format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
+    format_settings.json.read_bools_as_strings = settings.input_format_json_read_bools_as_strings;
     format_settings.json.read_numbers_as_strings = settings.input_format_json_read_numbers_as_strings;
     format_settings.json.read_objects_as_strings = settings.input_format_json_read_objects_as_strings;
     format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 8d5c044a311..5982d30f6a7 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -204,6 +204,7 @@ struct FormatSettings
         bool ignore_unknown_keys_in_named_tuple = false;
         bool serialize_as_strings = false;
         bool read_bools_as_numbers = true;
+        bool read_bools_as_strings = true;
         bool read_numbers_as_strings = true;
         bool read_objects_as_strings = true;
         bool read_arrays_as_strings = true;
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index e2ba188d015..f065d2f0f4d 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -377,6 +377,22 @@ namespace
         type_indexes.erase(TypeIndex::UInt8);
     }
 
+    /// If we have Bool and String types convert all numbers to String.
+    /// It's applied only when setting input_format_json_read_bools_as_strings is enabled.
+    void transformJSONBoolsAndStringsToString(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        if (!type_indexes.contains(TypeIndex::String) || !type_indexes.contains(TypeIndex::UInt8))
+            return;
+
+        for (auto & type : data_types)
+        {
+            if (isBool(type))
+                type = std::make_shared<DataTypeString>();
+        }
+
+        type_indexes.erase(TypeIndex::UInt8);
+    }
+
     /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
     /// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
     /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
@@ -628,6 +644,10 @@ namespace
             if (settings.json.read_bools_as_numbers)
                 transformBoolsAndNumbersToNumbers(data_types, type_indexes);
 
+            /// Convert Bool to String if needed.
+            if (settings.json.read_bools_as_strings)
+                transformJSONBoolsAndStringsToString(data_types, type_indexes);
+
             if (settings.json.try_infer_objects_as_tuples)
                 mergeJSONPaths(data_types, type_indexes, settings, json_info);
         };
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 256354b2833..05d35a57b12 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1382,8 +1382,12 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
     }
     else
     {
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol '{}' for key '{}'",
-                        std::string(*buf.position(), 1), name_of_field.toString());
+        throw Exception(
+            ErrorCodes::INCORRECT_DATA,
+            "Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
+            String(buf.position(), std::min(buf.available(), size_t(10))),
+            std::string(1, *buf.position()),
+            name_of_field.empty() ? "" : " for key " + name_of_field.toString());
     }
 }
 
@@ -1753,7 +1757,7 @@ void readQuotedField(String & s, ReadBuffer & buf)
 void readJSONField(String & s, ReadBuffer & buf)
 {
     s.clear();
-    auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); };
+    auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); };
     readParsedValueInto(s, buf, parse_func);
 }
 
diff --git a/tests/queries/0_stateless/02961_read_bool_as_string_json.reference b/tests/queries/0_stateless/02961_read_bool_as_string_json.reference
new file mode 100644
index 00000000000..56f15989a45
--- /dev/null
+++ b/tests/queries/0_stateless/02961_read_bool_as_string_json.reference
@@ -0,0 +1,12 @@
+true
+false
+str
+true
+false
+str
+['true','false']
+['false','true']
+['str1','str2']
+['true','false']
+['false','true']
+['str1','str2']
diff --git a/tests/queries/0_stateless/02961_read_bool_as_string_json.sql b/tests/queries/0_stateless/02961_read_bool_as_string_json.sql
new file mode 100644
index 00000000000..b9f4a7926f9
--- /dev/null
+++ b/tests/queries/0_stateless/02961_read_bool_as_string_json.sql
@@ -0,0 +1,9 @@
+set input_format_json_read_bools_as_strings=1;
+select * from format(JSONEachRow, 'x String', '{"x" : true}, {"x" : false}, {"x" : "str"}');
+select * from format(JSONEachRow, '{"x" : true}, {"x" : false}, {"x" : "str"}');
+select * from format(JSONEachRow, 'x String', '{"x" : tru}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
+select * from format(JSONEachRow, 'x String', '{"x" : fals}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
+select * from format(JSONEachRow, 'x String', '{"x" : atru}'); -- {serverError INCORRECT_DATA}
+select * from format(JSONEachRow, 'x Array(String)', '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
+select * from format(JSONEachRow, '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
+

From ea35637c19ab1773731c9a3c59bcfdfa758ec866 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 21:50:58 +0100
Subject: [PATCH 189/204] Update ReadFromMergeTree.cpp

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 98afd397c0f..68786bdec6c 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -23,6 +23,8 @@
 #include <Processors/Transforms/ReverseTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
+#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
+#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
 #include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
@@ -37,8 +39,6 @@
 #include <Common/JSONBuilder.h>
 #include <Common/isLocalAddress.h>
 #include <Common/logger_useful.h>
-#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
-#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Parsers/ExpressionListParsers.h>
 

From 2751b351cb4c30b80d0af2b1e906bdac1ac74d67 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 6 Jan 2024 16:44:38 +0100
Subject: [PATCH 190/204] Fix test

---
 tests/integration/test_storage_iceberg/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index d5f8d04e258..0a1491a7021 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -463,7 +463,7 @@ def test_schema_inference(started_cluster, format_version):
 
         create_iceberg_table(instance, TABLE_NAME, format)
 
-        res = instance.query(f"DESC {TABLE_NAME} FORMAT TSVRaw")
+        res = instance.query(f"DESC {TABLE_NAME} FORMAT TSVRaw", settings={"print_pretty_type_names": 0})
         expected = TSV(
             [
                 ["intC", "Nullable(Int32)"],

From 4a473c37ec89566e21bfd435893eade025260fa2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 6 Jan 2024 15:55:20 +0000
Subject: [PATCH 191/204] Automatic style fix

---
 tests/integration/test_storage_iceberg/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index 0a1491a7021..9a75dc50d61 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -463,7 +463,9 @@ def test_schema_inference(started_cluster, format_version):
 
         create_iceberg_table(instance, TABLE_NAME, format)
 
-        res = instance.query(f"DESC {TABLE_NAME} FORMAT TSVRaw", settings={"print_pretty_type_names": 0})
+        res = instance.query(
+            f"DESC {TABLE_NAME} FORMAT TSVRaw", settings={"print_pretty_type_names": 0}
+        )
         expected = TSV(
             [
                 ["intC", "Nullable(Int32)"],

From deda52b61124c86f0578bbbc7364920c72f6820c Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Sat, 6 Jan 2024 16:23:20 +0000
Subject: [PATCH 192/204] add packager script to build digest

---
 tests/ci/ci_config.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 031ab0be8a0..895a12313da 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -72,10 +72,20 @@ class BuildConfig:
             include_paths=[
                 "./src",
                 "./contrib/*-cmake",
+                "./contrib/consistent-hashing",
+                "./contrib/murmurhash",
+                "./contrib/libfarmhash",
+                "./contrib/pdqsort",
+                "./contrib/cityhash102",
+                "./contrib/sparse-checkout",
+                "./contrib/libmetrohash",
+                "./contrib/update-submodules.sh",
+                "./contrib/CMakeLists.txt",
                 "./cmake",
                 "./base",
                 "./programs",
                 "./packages",
+                "./docker/packager/packager",
             ],
             exclude_files=[".md"],
             docker=["clickhouse/binary-builder"],

From dd2d9ff168d3aad612dcbcec71a003faf9750b03 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 7 Jan 2024 08:25:19 +0000
Subject: [PATCH 193/204] Minor fixes

---
 src/Functions/idna.cpp                         | 18 +++++++++---------
 src/Functions/punycode.cpp                     |  4 ++--
 .../aspell-ignore/en/aspell-dict.txt           |  5 ++---
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp
index 087131021c9..a73347400c6 100644
--- a/src/Functions/idna.cpp
+++ b/src/Functions/idna.cpp
@@ -30,7 +30,6 @@ namespace ErrorCodes
 /// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
 ///   and [3] https://www.unicode.org/reports/tr46/#ToUnicode
 
-
 enum class ErrorHandling
 {
     Throw,  /// Throw exception
@@ -71,7 +70,7 @@ struct IdnaEncode
                 {
                     if constexpr (error_handling == ErrorHandling::Throw)
                     {
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
                     }
                     else
                     {
@@ -96,6 +95,7 @@ struct IdnaEncode
     }
 };
 
+/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
 struct IdnaDecode
 {
     /// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
@@ -113,11 +113,11 @@ struct IdnaDecode
         std::string unicode;
         for (size_t row = 0; row < rows; ++row)
         {
-            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
-            const size_t value_length = offsets[row] - prev_offset - 1;
-            std::string_view value_view(value, value_length);
+            const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t ascii_length = offsets[row] - prev_offset - 1;
+            std::string_view ascii_view(ascii, ascii_length);
 
-            unicode = ada::idna::to_unicode(value_view);
+            unicode = ada::idna::to_unicode(ascii_view);
 
             res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
             res_offsets.push_back(res_data.size());
@@ -149,7 +149,7 @@ REGISTER_FUNCTION(Idna)
 Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
         .syntax="idnaEncode(str)",
         .arguments={{"str", "Input string"}},
-        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
         .examples={
             {"simple",
             "SELECT idnaEncode('straße.münchen.de') AS ascii;",
@@ -166,7 +166,7 @@ Computes an ASCII representation of an Internationalized Domain Name. Throws an
 Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
         .syntax="punycodeEncode(str)",
         .arguments={{"str", "Input string"}},
-        .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
+        .returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
         .examples={
             {"simple",
             "SELECT idnaEncodeOrNull('München') AS ascii;",
@@ -180,7 +180,7 @@ Computes a ASCII representation of an Internationalized Domain Name. Returns an
 
     factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
         .description=R"(
-Computes a Unicode representation of an Internationalized Domain Name.)",
+Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
         .syntax="idnaDecode(str)",
         .arguments={{"str", "Input string"}},
         .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index 66e5ac6cb7b..159189744bd 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -37,7 +37,7 @@ enum class ErrorHandling
 
 struct PunycodeEncode
 {
-    /// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, returnn undefined output, i.e. garbage-in, garbage-out.
+    /// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
     static void vector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -60,7 +60,7 @@ struct PunycodeEncode
             value_utf32.resize(value_utf32_length);
             const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
             if (codepoints == 0)
-                value_utf32.clear(); /// input was empty or it is not valid UTF-8
+                value_utf32.clear(); /// input was empty or no valid UTF-8
 
             const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
             if (!ok)
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 11fab98cf2d..dcff1c82444 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1657,7 +1657,6 @@ icudata
 idempotency
 idnaDecode
 idnaEncode
-idnaEncodeOrNull
 ifNotFinite
 ifNull
 iframe
@@ -2083,9 +2082,7 @@ pseudorandomize
 psql
 ptrs
 punycodeDecode
-punycodeDecodeOrNull
 punycodeEncode
-punycodeEncodeOrNull
 pushdown
 pwrite
 py
@@ -2532,6 +2529,8 @@ trimRight
 trunc
 tryBase
 tryDecrypt
+tryIdnaEncode
+tryPunycodeDecode
 tskv
 tsv
 tui

From e947ed710b10a0d2a5266f196a751ca9cc57b943 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 7 Jan 2024 13:03:21 +0000
Subject: [PATCH 194/204] Hijack and document 'update' map to represent reload
 status

---
 .../system-tables/server_settings.md          |  39 ++--
 src/Core/ServerSettings.cpp                   |   4 +-
 src/Core/ServerSettings.h                     | 206 ++++++++----------
 .../System/StorageSystemServerSettings.cpp    |  88 +++++---
 4 files changed, 167 insertions(+), 170 deletions(-)

diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md
index 6374b2d02a2..417c3460a53 100644
--- a/docs/en/operations/system-tables/server_settings.md
+++ b/docs/en/operations/system-tables/server_settings.md
@@ -14,12 +14,11 @@ Columns:
 - `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
 - `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
 - `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
-- `is_hot_reloadable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether hot reload is supported
-- `runtime_reload` ([Enum8](../../sql-reference/data-types/enum.md)) — Hot reload type of parameter. Possible values:
-    - `Yes`
-    - `OnlyIncrease`
-    - `OnlyDecrease`
-    - `No`
+- `changeable_without_restart` ([Enum8](../../sql-reference/data-types/enum.md)) — Whether the setting can be changed at server runtime. Values:
+    - `'No' `
+    - `'IncreaseOnly'`
+    - `'DecreaseOnly'`
+    - `'Yes'`
 - `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete.
 
 **Example**
@@ -33,20 +32,20 @@ WHERE name LIKE '%thread_pool%'
 ```
 
 ``` text
-┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─is_hot_reloadable─┬─runtime_reload─┬─is_obsolete─┐
-│ max_thread_pool_size                        │ 10000 │ 10000   │       0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │                 0 │ No             │           0 │
-│ max_thread_pool_free_size                   │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │                 0 │ No             │           0 │
-│ thread_pool_queue_size                      │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │                 0 │ No             │           0 │
-│ max_io_thread_pool_size                     │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │                 0 │ Yes            │           0 │
-│ max_io_thread_pool_free_size                │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │                 0 │ Yes            │           0 │
-│ io_thread_pool_queue_size                   │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │                 0 │ Yes            │           0 │
-│ max_active_parts_loading_thread_pool_size   │ 64    │ 64      │       0 │ The number of threads to load active set of data parts (Active ones) at startup.                                                                    │ UInt64 │                 0 │ Yes            │           0 │
-│ max_outdated_parts_loading_thread_pool_size │ 32    │ 32      │       0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup.                                                                │ UInt64 │                 0 │ Yes            │           0 │
-│ max_parts_cleaning_thread_pool_size         │ 128   │ 128     │       0 │ The number of threads for concurrent removal of inactive data parts.                                                                                │ UInt64 │                 0 │ Yes            │           0 │
-│ max_backups_io_thread_pool_size             │ 1000  │ 1000    │       0 │ The maximum number of threads that would be used for IO operations for BACKUP queries                                                               │ UInt64 │                 0 │ Yes            │           0 │
-│ max_backups_io_thread_pool_free_size        │ 0     │ 0       │       0 │ Max free size for backups IO thread pool.                                                                                                           │ UInt64 │                 0 │ Yes            │           0 │
-│ backups_io_thread_pool_queue_size           │ 0     │ 0       │       0 │ Queue size for backups IO thread pool.                                                                                                              │ UInt64 │                 0 │ Yes            │           0 │
-└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴───────────────────┴────────────────┴─────────────┘
+┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐
+│ max_thread_pool_size                        │ 10000 │ 10000   │       0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │                         No │           0 │
+│ max_thread_pool_free_size                   │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │                         No │           0 │
+│ thread_pool_queue_size                      │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │                         No │           0 │
+│ max_io_thread_pool_size                     │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │                         No │           0 │
+│ max_io_thread_pool_free_size                │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │                         No │           0 │
+│ io_thread_pool_queue_size                   │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │                         No │           0 │
+│ max_active_parts_loading_thread_pool_size   │ 64    │ 64      │       0 │ The number of threads to load active set of data parts (Active ones) at startup.                                                                    │ UInt64 │                         No │           0 │
+│ max_outdated_parts_loading_thread_pool_size │ 32    │ 32      │       0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup.                                                                │ UInt64 │                         No │           0 │
+│ max_parts_cleaning_thread_pool_size         │ 128   │ 128     │       0 │ The number of threads for concurrent removal of inactive data parts.                                                                                │ UInt64 │                         No │           0 │
+│ max_backups_io_thread_pool_size             │ 1000  │ 1000    │       0 │ The maximum number of threads that would be used for IO operations for BACKUP queries                                                               │ UInt64 │                         No │           0 │
+│ max_backups_io_thread_pool_free_size        │ 0     │ 0       │       0 │ Max free size for backups IO thread pool.                                                                                                           │ UInt64 │                         No │           0 │
+│ backups_io_thread_pool_queue_size           │ 0     │ 0       │       0 │ Queue size for backups IO thread pool.                                                                                                              │ UInt64 │                         No │           0 │
+└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘
 
 ```
 
diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index 7f48226c213..fbf86d3e9ad 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -4,12 +4,10 @@
 namespace DB
 {
 
-IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WRAP)
+IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
 
 void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
 {
-
-    SERVER_SETTINGS(SET_RUNTIME_RELOAD, "", "")
     // settings which can be loaded from the the default profile, see also MAKE_DEPRECATED_BY_SERVER_CONFIG in src/Core/Settings.h
     std::unordered_set<std::string> settings_from_profile_allowlist = {
         "background_pool_size",
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 3215a35b483..03a51c1f065 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -9,132 +9,114 @@ namespace Poco::Util
 {
 class AbstractConfiguration;
 }
+
 namespace DB
 {
 
-enum class RuntimeReloadType
-{
-    Yes,
-    OnlyIncrease,
-    OnlyDecrease,
-    No,
-};
-
-#define SET_RUNTIME_RELOAD(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
-    this->runtime_reload_map.insert(std::make_pair(#NAME, RUNTIME_RELOAD));
-
-#define M_WRAP(M, TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS, RUNTIME_RELOAD) \
-    M(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
-
-
-#define SERVER_SETTINGS(MW, M, ALIAS) \
-    MW(M, Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0, RuntimeReloadType::No) \
-    MW(M, Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0, RuntimeReloadType::No) \
-    MW(M, Int32, max_connections, 1024, "Max server connections.", 0, RuntimeReloadType::No) \
-    MW(M, UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0, RuntimeReloadType::No) \
-    MW(M, UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0, RuntimeReloadType::No) \
-    MW(M, String, default_database, "default", "Default database name.", 0, RuntimeReloadType::No) \
-    MW(M, String, tmp_policy, "", "Policy for storage with temporary data.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0, RuntimeReloadType::No) \
-    MW(M, String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
-    MW(M, Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
-    MW(M, Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0, RuntimeReloadType::Yes) \
-    MW(M, Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0, RuntimeReloadType::No) \
-    MW(M, Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0, RuntimeReloadType::No) \
+#define SERVER_SETTINGS(M, ALIAS) \
+    M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
+    M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
+    M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
+    M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
+    M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
+    M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
+    M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
+    M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
+    M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
+    M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
+    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
+    M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
+    M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
+    M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
+    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
+    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
+    M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
+    M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
+    M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
+    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
+    M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
+    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
+    M(Int32, max_connections, 1024, "Max server connections.", 0) \
+    M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
+    M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
+    M(String, default_database, "default", "Default database name.", 0) \
+    M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
+    M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
+    M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
+    M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
+    M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
+    M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
+    M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
+    M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
+    M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
+    M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
+    M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
     \
-    MW(M, UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0, RuntimeReloadType::Yes) \
+    M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
+    M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
+    M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
     \
-    MW(M, Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0, RuntimeReloadType::No) \
-    MW(M, String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0, RuntimeReloadType::No) \
-    MW(M, Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
-    MW(M, String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0, RuntimeReloadType::No) \
-    MW(M, Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
-    MW(M, String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0, RuntimeReloadType::No) \
-    MW(M, Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
-    MW(M, String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0, RuntimeReloadType::No) \
-    MW(M, Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0, RuntimeReloadType::No) \
+    M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
+    M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
+    M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
+    M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
+    M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
+    M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
+    M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
+    M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
+    M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
+    M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
+    M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
+    M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
+    M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
+    M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
     \
-    MW(M, Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0, RuntimeReloadType::No) \
-    MW(M, Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0, RuntimeReloadType::No) \
-    MW(M, UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0, RuntimeReloadType::No) \
+    M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
+    M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
+    M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
     \
-    MW(M, UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0, RuntimeReloadType::OnlyDecrease) \
-    MW(M, UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0, RuntimeReloadType::OnlyDecrease) \
+    M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
+    M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
+    M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
+    M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
     \
-    MW(M, UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0, RuntimeReloadType::OnlyIncrease) \
-    MW(M, UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0, RuntimeReloadType::Yes) \
-    MW(M, UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0, RuntimeReloadType::Yes) \
-    MW(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::No) \
-    MW(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::No) \
+    M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
+    M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
+    M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
+    M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
+    M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
+    M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
+    M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
+    M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
+    M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
+    M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
+    M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
+    M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
+    M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
+    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
     \
-    MW(M, Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0, RuntimeReloadType::No) \
-    MW(M, Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
-    MW(M, Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
-    MW(M, Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::No) \
-    MW(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::No) \
-    MW(M, UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::No) \
-    MW(M, Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0, RuntimeReloadType::No) \
-    MW(M, Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0, RuntimeReloadType::No) \
+    M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
+    M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
+    M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
+    M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
+    M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
+    M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
+    M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
+    M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
 
+    /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 
-#define SERVER_SETTINGS_WRAP(M, ALIAS) \
-    SERVER_SETTINGS(M_WRAP, M, ALIAS)
-
-DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS_WRAP)
-
+DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
 
 struct ServerSettings : public BaseSettings<ServerSettingsTraits>
 {
-    std::unordered_map<std::string, RuntimeReloadType> runtime_reload_map;
     void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
 };
 
diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index bcfca59618a..7c42c610c22 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -1,12 +1,15 @@
-#include <Storages/System/StorageSystemServerSettings.h>
 #include <Core/BackgroundSchedulePool.h>
-#include <DataTypes/DataTypeString.h>
+#include <Core/ServerSettings.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <IO/MMappedFileCache.h>
+#include <IO/UncompressedCache.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
-#include <Core/ServerSettings.h>
+#include <Storages/MarkCache.h>
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
+#include <Storages/System/StorageSystemServerSettings.h>
 
 
 namespace CurrentMetrics
@@ -20,18 +23,25 @@ namespace CurrentMetrics
 namespace DB
 {
 
-static std::vector<std::pair<String, Int8>> getRuntimeReloadEnumAndValues()
+enum class ChangeableWithoutRestart
 {
-    return std::vector<std::pair<String, Int8>>{
-        {"Yes",            static_cast<Int8>(RuntimeReloadType::Yes)},
-        {"OnlyIncrease",    static_cast<Int8>(RuntimeReloadType::OnlyIncrease)},
-        {"OnlyDecrease",    static_cast<Int8>(RuntimeReloadType::OnlyDecrease)},
-        {"No",              static_cast<Int8>(RuntimeReloadType::No)},
-    };
-}
+    No,
+    IncreaseOnly,
+    DecreaseOnly,
+    Yes
+};
 
 NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
 {
+    auto changeable_without_restart_type = std::make_shared<DataTypeEnum8>(
+        DataTypeEnum8::Values
+        {
+            {"No",              static_cast<Int8>(ChangeableWithoutRestart::No)},
+            {"IncreaseOnly",    static_cast<Int8>(ChangeableWithoutRestart::IncreaseOnly)},
+            {"DecreaseOnly",    static_cast<Int8>(ChangeableWithoutRestart::DecreaseOnly)},
+            {"Yes",             static_cast<Int8>(ChangeableWithoutRestart::Yes)},
+        });
+
     return {
         {"name", std::make_shared<DataTypeString>()},
         {"value", std::make_shared<DataTypeString>()},
@@ -39,38 +49,45 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
         {"changed", std::make_shared<DataTypeUInt8>()},
         {"description", std::make_shared<DataTypeString>()},
         {"type", std::make_shared<DataTypeString>()},
-        {"is_hot_reloadable", std::make_shared<DataTypeUInt8>()},
-        {"runtime_reload", std::make_shared<DataTypeEnum8>(getRuntimeReloadEnumAndValues())},
+        {"changeable_without_restart", std::move(changeable_without_restart_type)},
         {"is_obsolete", std::make_shared<DataTypeUInt8>()}
     };
 }
 
 void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
 {
-    // Server settings that have been reloaded from the config file.
-    std::unordered_map<std::string, std::string> updated = {
-        {"max_server_memory_usage", std::to_string(total_memory_tracker.getHardLimit())},
-        {"allow_use_jemalloc_memory", std::to_string(total_memory_tracker.getAllowUseJemallocMmemory())},
+    /// When the server configuration file is periodically re-loaded from disk, the server components (e.g. memory tracking) are updated
+    /// with new the setting values but the settings themselves are not stored between re-loads. As a result, if one wants to know the
+    /// current setting values, one needs to ask the components directly.
+    std::unordered_map<String, std::pair<String, ChangeableWithoutRestart>> changeable_settings = {
+        {"max_server_memory_usage", {std::to_string(total_memory_tracker.getHardLimit()), ChangeableWithoutRestart::Yes}},
+        {"allow_use_jemalloc_memory", {std::to_string(total_memory_tracker.getAllowUseJemallocMmemory()), ChangeableWithoutRestart::Yes}},
 
-        {"max_table_size_to_drop", std::to_string(context->getMaxTableSizeToDrop())},
-        {"max_partition_size_to_drop", std::to_string(context->getMaxPartitionSizeToDrop())},
+        {"max_table_size_to_drop", {std::to_string(context->getMaxTableSizeToDrop()), ChangeableWithoutRestart::Yes}},
+        {"max_partition_size_to_drop", {std::to_string(context->getMaxPartitionSizeToDrop()), ChangeableWithoutRestart::Yes}},
 
-        {"max_concurrent_queries", std::to_string(context->getProcessList().getMaxSize())},
-        {"max_concurrent_insert_queries", std::to_string(context->getProcessList().getMaxInsertQueriesAmount())},
-        {"max_concurrent_select_queries", std::to_string(context->getProcessList().getMaxSelectQueriesAmount())},
+        {"max_concurrent_queries", {std::to_string(context->getProcessList().getMaxSize()), ChangeableWithoutRestart::Yes}},
+        {"max_concurrent_insert_queries", {std::to_string(context->getProcessList().getMaxInsertQueriesAmount()), ChangeableWithoutRestart::Yes}},
+        {"max_concurrent_select_queries", {std::to_string(context->getProcessList().getMaxSelectQueriesAmount()), ChangeableWithoutRestart::Yes}},
 
-        {"background_buffer_flush_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize))},
-        {"background_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize))},
-        {"background_message_broker_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize))},
-        {"background_distributed_schedule_pool_size", std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize))}
+        {"background_buffer_flush_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}},
+        {"background_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}},
+        {"background_message_broker_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}},
+        {"background_distributed_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}},
+
+        {"mark_cache_size", {std::to_string(context->getMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
+        {"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
+        {"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
+        {"index_uncompressed_cache_size", {std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
+        {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}
     };
 
     if (context->areBackgroundExecutorsInitialized())
     {
-        updated.insert({"background_pool_size", std::to_string(context->getMergeMutateExecutor()->getMaxThreads())});
-        updated.insert({"background_move_pool_size", std::to_string(context->getMovesExecutor()->getMaxThreads())});
-        updated.insert({"background_fetches_pool_size", std::to_string(context->getFetchesExecutor()->getMaxThreads())});
-        updated.insert({"background_common_pool_size", std::to_string(context->getCommonExecutor()->getMaxThreads())});
+        changeable_settings.insert({"background_pool_size", {std::to_string(context->getMergeMutateExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
+        changeable_settings.insert({"background_move_pool_size", {std::to_string(context->getMovesExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
+        changeable_settings.insert({"background_fetches_pool_size", {std::to_string(context->getFetchesExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
+        changeable_settings.insert({"background_common_pool_size", {std::to_string(context->getCommonExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
     }
 
     const auto & config = context->getConfigRef();
@@ -80,17 +97,18 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
     for (const auto & setting : settings.all())
     {
         const auto & setting_name = setting.getName();
-        const auto & it = updated.find(setting_name);
+
+        const auto & changeable_settings_it = changeable_settings.find(setting_name);
+        const bool is_changeable = (changeable_settings_it != changeable_settings.end());
 
         res_columns[0]->insert(setting_name);
-        res_columns[1]->insert((it != updated.end()) ? it->second: setting.getValueString());
+        res_columns[1]->insert(is_changeable ? changeable_settings_it->second.first : setting.getValueString());
         res_columns[2]->insert(setting.getDefaultValueString());
         res_columns[3]->insert(setting.isValueChanged());
         res_columns[4]->insert(setting.getDescription());
         res_columns[5]->insert(setting.getTypeName());
-        res_columns[6]->insert((it != updated.end()) ? true : false);
-        res_columns[7]->insert(static_cast<Int8>(settings.runtime_reload_map.contains(setting_name) ? settings.runtime_reload_map.at(setting_name): RuntimeReloadType::No));
-        res_columns[8]->insert(setting.isObsolete());
+        res_columns[6]->insert(is_changeable ? changeable_settings_it->second.second : ChangeableWithoutRestart::No);
+        res_columns[7]->insert(setting.isObsolete());
     }
 }
 

From 8cd85520861607cd8bdb07856cba81f38ea33ff9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 7 Jan 2024 14:13:49 +0000
Subject: [PATCH 195/204] Empty-Commit


From 371d6646677adc522a67b8293c0dec0dd035df93 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sun, 7 Jan 2024 17:27:22 +0100
Subject: [PATCH 196/204] Update MergeTreeData.cpp

---
 src/Storages/MergeTree/MergeTreeData.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e6b0c581f27..4ad6c564a18 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4006,9 +4006,13 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         /// We don't need to commit it to zk, and don't even need to activate it.
 
         MergeTreePartInfo empty_info = drop_range;
-        empty_info.min_block = empty_info.level = empty_info.mutation = 0;
+        empty_info.level = empty_info.mutation = 0;
+        empty_info.min_block = MergeTreePartInfo::MAX_BLOCK_NUMBER;
         for (const auto & part : parts_to_remove)
         {
+            /// We still have to take min_block into account to avoid creating multiple covering ranges
+            /// that intersect each other
+            empty_info.min_block = std::min(empty_info.min_block, part->info.min_block);
             empty_info.level = std::max(empty_info.level, part->info.level);
             empty_info.mutation = std::max(empty_info.mutation, part->info.mutation);
         }

From 2c61015578e892d5a839c11d426eb8fa8944ccc1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 7 Jan 2024 23:06:36 +0100
Subject: [PATCH 197/204] Do not load database engines in suggest

---
 src/Client/Suggest.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp
index 836c03d81ff..eb98c3a5740 100644
--- a/src/Client/Suggest.cpp
+++ b/src/Client/Suggest.cpp
@@ -77,7 +77,6 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
     };
 
     add_column("name", "functions", false, {});
-    add_column("name", "database_engines", false, {});
     add_column("name", "table_engines", false, {});
     add_column("name", "formats", false, {});
     add_column("name", "table_functions", false, {});

From eb8217efb07dbf6306fd67b43028e9b89a52af99 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 8 Jan 2024 01:22:11 +0100
Subject: [PATCH 198/204] Fix wrong message in Keeper

---
 programs/keeper/Keeper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 48d26233d94..109884ec899 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -335,7 +335,7 @@ try
     else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
     {
         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                        "By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly",
+                        "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly",
                         KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
     }
     else

From 149de9aa9378571eff9569cb90c9bcfd42709316 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Mon, 8 Jan 2024 03:40:48 +0000
Subject: [PATCH 199/204] discard fs::canonical return

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/StorageFile.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b3c686c290d..f44f25695f8 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -115,10 +115,10 @@ void listFilesWithRegexpMatchingImpl(
     {
         try
         {
-            /// We use fs::canonical to check if the file exists but the result path
-            /// will be fs::absolute.
+            /// We use fs::canonical to resolve the canonical path and check if the file does exists
+            /// but the result path will be fs::absolute.
             /// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
-            fs::path canonical_path = fs::canonical(path_for_ls + for_match);
+            fs::canonical(path_for_ls + for_match);
             fs::path absolute_path = fs::absolute(path_for_ls + for_match);
             result.push_back(absolute_path.string());
         }

From f38510be868f1966587cd375f1ff1ee01a98b0bb Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 30 Dec 2023 20:26:24 +0100
Subject: [PATCH 200/204] Do not fail the CI on an expired token

---
 tests/ci/get_robot_token.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py
index 26318b55d15..3781cdc5cc8 100644
--- a/tests/ci/get_robot_token.py
+++ b/tests/ci/get_robot_token.py
@@ -6,6 +6,7 @@ from typing import Any, Dict, List, Optional, Union
 import boto3  # type: ignore
 from github import Github
 from github.AuthenticatedUser import AuthenticatedUser
+from github.GithubException import BadCredentialsException
 from github.NamedUser import NamedUser
 
 
@@ -68,12 +69,20 @@ def get_best_robot_token(tokens_path: str = "/github-tokens") -> str:
     }
     assert tokens
 
-    for value in tokens.values():
+    for name, value in tokens.items():
         gh = Github(value, per_page=100)
-        # Do not spend additional request to API by accessin user.login unless
-        # the token is chosen by the remaining requests number
-        user = gh.get_user()
-        rest, _ = gh.rate_limiting
+        try:
+            # Do not spend additional request to API by accessin user.login unless
+            # the token is chosen by the remaining requests number
+            user = gh.get_user()
+            rest, _ = gh.rate_limiting
+        except BadCredentialsException:
+            logging.error(
+                "The token %(name)s has expired, please update it\n"
+                "::error::Token %(name)s has expired, it must be updated",
+                {"name": name},
+            )
+            continue
         logging.info("Get token with %s remaining requests", rest)
         if ROBOT_TOKEN is None:
             ROBOT_TOKEN = Token(user, value, rest)

From 77b349b136d2955e9af1a0e247f666e038c8fb5b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 8 Jan 2024 13:25:28 +0100
Subject: [PATCH 201/204] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 735094df78b..76d06ebe837 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -6,7 +6,6 @@
 01155_rename_move_materialized_view
 01214_test_storage_merge_aliases_with_where
 01244_optimize_distributed_group_by_sharding_key
-01268_mv_scalars
 01268_shard_avgweighted
 01495_subqueries_in_with_statement
 01560_merge_distributed_join
@@ -21,21 +20,18 @@
 01925_test_storage_merge_aliases
 01952_optimize_distributed_group_by_sharding_key
 02174_cte_scalar_cache_mv
-02352_grouby_shadows_arg
 02354_annoy
 02428_parameterized_view
-02479_race_condition_between_insert_and_droppin_mv
 02493_inconsistent_hex_and_binary_number
 02575_merge_prewhere_different_default_kind
 00917_multiple_joins_denny_crane
-00636_partition_key_parts_pruning
-02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 02404_memory_bound_merging
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
-02815_range_dict_no_direct_join
 # Flaky. Please don't delete them without fixing them:
+02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
+02479_race_condition_between_insert_and_droppin_mv
 01600_parts_states_metrics_long
 01287_max_execution_speed

From 7f84b7699326a5614447992f48929b7e0613e023 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 8 Jan 2024 14:26:18 +0100
Subject: [PATCH 202/204] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 76d06ebe837..e8799a1a96a 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -33,5 +33,3 @@
 # Flaky. Please don't delete them without fixing them:
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 02479_race_condition_between_insert_and_droppin_mv
-01600_parts_states_metrics_long
-01287_max_execution_speed

From 2e7a0bfd1c602e7cbb05b8c9ad0bb522ae9636d2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 8 Jan 2024 14:27:05 +0100
Subject: [PATCH 203/204] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index e8799a1a96a..b03bed2d196 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -25,11 +25,11 @@
 02493_inconsistent_hex_and_binary_number
 02575_merge_prewhere_different_default_kind
 00917_multiple_joins_denny_crane
-02404_memory_bound_merging
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
 # Flaky. Please don't delete them without fixing them:
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
+02404_memory_bound_merging
 02479_race_condition_between_insert_and_droppin_mv

From 7abb850757f08dbd6ad9e6f00191116e13b0f565 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 8 Jan 2024 14:46:22 +0100
Subject: [PATCH 204/204] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index b03bed2d196..4643d109c3d 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -30,6 +30,7 @@
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
 # Flaky. Please don't delete them without fixing them:
+01287_max_execution_speed
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 02404_memory_bound_merging
 02479_race_condition_between_insert_and_droppin_mv