From 26c9042ea0f0529f464435cbeef111f3e6d396a5 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 12 Jun 2023 17:06:52 +0000
Subject: [PATCH 001/884] Analyzer: support aliases in StorageMerge

---
 src/Analyzer/IQueryTreePass.h                 |   2 +-
 ...egateFunctionsArithmericOperationsPass.cpp |   2 +-
 ...gregateFunctionsArithmericOperationsPass.h |   2 +-
 src/Analyzer/Passes/ArrayExistsToHasPass.cpp  |   2 +-
 src/Analyzer/Passes/ArrayExistsToHasPass.h    |   2 +-
 src/Analyzer/Passes/AutoFinalOnQueryPass.cpp  |   2 +-
 src/Analyzer/Passes/AutoFinalOnQueryPass.h    |   2 +-
 .../Passes/ComparisonTupleEliminationPass.cpp |   2 +-
 .../Passes/ComparisonTupleEliminationPass.h   |   2 +-
 .../Passes/ConvertOrLikeChainPass.cpp         |   2 +-
 src/Analyzer/Passes/ConvertOrLikeChainPass.h  |   2 +-
 src/Analyzer/Passes/ConvertQueryToCNFPass.cpp |   2 +-
 src/Analyzer/Passes/ConvertQueryToCNFPass.h   |   2 +-
 src/Analyzer/Passes/CountDistinctPass.cpp     |   2 +-
 src/Analyzer/Passes/CountDistinctPass.h       |   2 +-
 src/Analyzer/Passes/CrossToInnerJoinPass.cpp  |   2 +-
 src/Analyzer/Passes/CrossToInnerJoinPass.h    |   2 +-
 .../Passes/FunctionToSubcolumnsPass.cpp       |   2 +-
 .../Passes/FunctionToSubcolumnsPass.h         |   2 +-
 src/Analyzer/Passes/FuseFunctionsPass.cpp     |   2 +-
 src/Analyzer/Passes/FuseFunctionsPass.h       |   2 +-
 .../Passes/GroupingFunctionsResolvePass.cpp   |   2 +-
 .../Passes/GroupingFunctionsResolvePass.h     |   2 +-
 src/Analyzer/Passes/IfChainToMultiIfPass.cpp  |   2 +-
 src/Analyzer/Passes/IfChainToMultiIfPass.h    |   2 +-
 .../Passes/IfConstantConditionPass.cpp        |   2 +-
 src/Analyzer/Passes/IfConstantConditionPass.h |   2 +-
 .../Passes/IfTransformStringsToEnumPass.cpp   |   2 +-
 .../Passes/IfTransformStringsToEnumPass.h     |   2 +-
 .../Passes/LogicalExpressionOptimizerPass.cpp |   2 +-
 .../Passes/LogicalExpressionOptimizerPass.h   |   2 +-
 src/Analyzer/Passes/MultiIfToIfPass.cpp       |   2 +-
 src/Analyzer/Passes/MultiIfToIfPass.h         |   2 +-
 .../Passes/NormalizeCountVariantsPass.cpp     |   2 +-
 .../Passes/NormalizeCountVariantsPass.h       |   2 +-
 .../OptimizeGroupByFunctionKeysPass.cpp       |   2 +-
 .../Passes/OptimizeGroupByFunctionKeysPass.h  |   2 +-
 ...ptimizeRedundantFunctionsInOrderByPass.cpp |   2 +-
 .../OptimizeRedundantFunctionsInOrderByPass.h |   2 +-
 ...OrderByLimitByDuplicateEliminationPass.cpp |   2 +-
 .../OrderByLimitByDuplicateEliminationPass.h  |   2 +-
 .../Passes/OrderByTupleEliminationPass.cpp    |   2 +-
 .../Passes/OrderByTupleEliminationPass.h      |   2 +-
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |  15 ++-
 src/Analyzer/Passes/QueryAnalysisPass.h       |   2 +-
 .../RewriteAggregateFunctionWithIfPass.cpp    |   2 +-
 .../RewriteAggregateFunctionWithIfPass.h      |   2 +-
 .../Passes/ShardNumColumnToFunctionPass.cpp   |   2 +-
 .../Passes/ShardNumColumnToFunctionPass.h     |   2 +-
 src/Analyzer/Passes/SumIfToCountIfPass.cpp    |   2 +-
 src/Analyzer/Passes/SumIfToCountIfPass.h      |   2 +-
 .../UniqInjectiveFunctionsEliminationPass.cpp |   2 +-
 .../UniqInjectiveFunctionsEliminationPass.h   |   2 +-
 src/Planner/PlannerActionsVisitor.cpp         |   4 +-
 src/Storages/StorageDistributed.cpp           |   4 +-
 src/Storages/StorageMerge.cpp                 | 121 +++++++++++++++---
 src/Storages/StorageMerge.h                   |   9 +-
 57 files changed, 177 insertions(+), 80 deletions(-)

diff --git a/src/Analyzer/IQueryTreePass.h b/src/Analyzer/IQueryTreePass.h
index 4293934c32d..d4499c3271c 100644
--- a/src/Analyzer/IQueryTreePass.h
+++ b/src/Analyzer/IQueryTreePass.h
@@ -31,7 +31,7 @@ public:
     virtual String getDescription() = 0;
 
     /// Run pass over query tree
-    virtual void run(QueryTreeNodePtr query_tree_node, ContextPtr context) = 0;
+    virtual void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) = 0;
 
 };
 
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index 1476a66c892..2a69292ff78 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -201,7 +201,7 @@ private:
 
 }
 
-void AggregateFunctionsArithmericOperationsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void AggregateFunctionsArithmericOperationsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     AggregateFunctionsArithmericOperationsVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h
index a89d2f87ad9..d510b62f9be 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Extract arithmeric operations from aggregate functions."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
index c0f958588f1..63d417cd570 100644
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
@@ -92,7 +92,7 @@ public:
 
 }
 
-void RewriteArrayExistsToHasPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void RewriteArrayExistsToHasPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     RewriteArrayExistsToHasVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.h b/src/Analyzer/Passes/ArrayExistsToHasPass.h
index 8f4623116e3..4795b61c625 100644
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.h
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.h
@@ -20,7 +20,7 @@ public:
 
     String getDescription() override { return "Rewrite arrayExists(func, arr) functions to has(arr, elem) when logically equivalent"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
index 15326ca1dc8..ee9e1023949 100644
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@@ -67,7 +67,7 @@ private:
 
 }
 
-void AutoFinalOnQueryPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void AutoFinalOnQueryPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     auto visitor = AutoFinalOnQueryPassVisitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.h b/src/Analyzer/Passes/AutoFinalOnQueryPass.h
index 3489597108c..d595b98d349 100644
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.h
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.h
@@ -25,7 +25,7 @@ public:
         return "Automatically applies final modifier to table expressions in queries if it is supported and if user level final setting is set";
     }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
index 4e0562a2fe8..57920065513 100644
--- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
+++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
@@ -201,7 +201,7 @@ private:
 
 }
 
-void ComparisonTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void ComparisonTupleEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     ComparisonTupleEliminationPassVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.h b/src/Analyzer/Passes/ComparisonTupleEliminationPass.h
index 954a9d6a2f0..7f4245e2d95 100644
--- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.h
+++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Rewrite tuples comparison into equivalent comparison of tuples arguments"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
index 7d7362fb742..0d2ddd20374 100644
--- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
+++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
@@ -132,7 +132,7 @@ private:
 
 }
 
-void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void ConvertOrLikeChainPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     auto or_function_resolver = FunctionFactory::instance().get("or", context);
     auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context);
diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.h b/src/Analyzer/Passes/ConvertOrLikeChainPass.h
index 0f734bfa73d..90bccaa0e8d 100644
--- a/src/Analyzer/Passes/ConvertOrLikeChainPass.h
+++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.h
@@ -14,7 +14,7 @@ public:
 
     String getDescription() override { return "Replaces all the 'or's with {i}like to multiMatchAny"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
index 4d32c96b845..ecba2e28749 100644
--- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
+++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
@@ -720,7 +720,7 @@ public:
 
 }
 
-void ConvertLogicalExpressionToCNFPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void ConvertLogicalExpressionToCNFPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     const auto & settings = context->getSettingsRef();
     if (!settings.convert_query_to_cnf)
diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.h b/src/Analyzer/Passes/ConvertQueryToCNFPass.h
index 5ed874db006..60943c04d78 100644
--- a/src/Analyzer/Passes/ConvertQueryToCNFPass.h
+++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.h
@@ -12,7 +12,7 @@ public:
 
     String getDescription() override { return "Convert logical expression to CNF and apply optimizations using constraints"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp
index 945295f5cbc..eb2859020be 100644
--- a/src/Analyzer/Passes/CountDistinctPass.cpp
+++ b/src/Analyzer/Passes/CountDistinctPass.cpp
@@ -84,7 +84,7 @@ public:
 
 }
 
-void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void CountDistinctPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     CountDistinctVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/CountDistinctPass.h b/src/Analyzer/Passes/CountDistinctPass.h
index cac5033c98f..33728b0228c 100644
--- a/src/Analyzer/Passes/CountDistinctPass.h
+++ b/src/Analyzer/Passes/CountDistinctPass.h
@@ -20,7 +20,7 @@ public:
         return "Optimize single countDistinct into count over subquery";
     }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
index d4877d23f28..3283c163890 100644
--- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
+++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
@@ -264,7 +264,7 @@ private:
 
 }
 
-void CrossToInnerJoinPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void CrossToInnerJoinPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     CrossToInnerJoinVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.h b/src/Analyzer/Passes/CrossToInnerJoinPass.h
index 127d26dc41d..b0437c562ac 100644
--- a/src/Analyzer/Passes/CrossToInnerJoinPass.h
+++ b/src/Analyzer/Passes/CrossToInnerJoinPass.h
@@ -22,7 +22,7 @@ public:
         return "Replace CROSS JOIN with INNER JOIN";
     }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
index 696483862e0..1b04136e6a4 100644
--- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
+++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
@@ -202,7 +202,7 @@ private:
 
 }
 
-void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     FunctionToSubcolumnsVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.h b/src/Analyzer/Passes/FunctionToSubcolumnsPass.h
index 0e1d2583e7b..d4edcc5b922 100644
--- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.h
+++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.h
@@ -24,7 +24,7 @@ public:
 
     String getDescription() override { return "Rewrite function to subcolumns, for example tupleElement(column, subcolumn) into column.subcolumn"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp
index 14082697955..ef87528964c 100644
--- a/src/Analyzer/Passes/FuseFunctionsPass.cpp
+++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp
@@ -254,7 +254,7 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
 
 }
 
-void FuseFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void FuseFunctionsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     tryFuseSumCountAvg(query_tree_node, context);
     tryFuseQuantiles(query_tree_node, context);
diff --git a/src/Analyzer/Passes/FuseFunctionsPass.h b/src/Analyzer/Passes/FuseFunctionsPass.h
index a92b77b1115..2fd85da4747 100644
--- a/src/Analyzer/Passes/FuseFunctionsPass.h
+++ b/src/Analyzer/Passes/FuseFunctionsPass.h
@@ -20,7 +20,7 @@ public:
 
     String getDescription() override { return "Replaces several calls of aggregate functions of the same family into one call"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
index 0cf5310a3ad..774014e5ffd 100644
--- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
+++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
@@ -248,7 +248,7 @@ private:
 
 }
 
-void GroupingFunctionsResolvePass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void GroupingFunctionsResolvePass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     GroupingFunctionsResolveVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.h b/src/Analyzer/Passes/GroupingFunctionsResolvePass.h
index 070c8dd9389..cd932f76977 100644
--- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.h
+++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.h
@@ -24,7 +24,7 @@ public:
 
     String getDescription() override { return "Resolve GROUPING functions based on GROUP BY modifiers"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
index 1f97e012331..91a5709f142 100644
--- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
+++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
@@ -73,7 +73,7 @@ private:
 
 }
 
-void IfChainToMultiIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void IfChainToMultiIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     auto multi_if_function_ptr = FunctionFactory::instance().get("multiIf", context);
     IfChainToMultiIfPassVisitor visitor(std::move(multi_if_function_ptr), std::move(context));
diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.h b/src/Analyzer/Passes/IfChainToMultiIfPass.h
index 43f3fb8831d..9e7335d93e4 100644
--- a/src/Analyzer/Passes/IfChainToMultiIfPass.h
+++ b/src/Analyzer/Passes/IfChainToMultiIfPass.h
@@ -18,7 +18,7 @@ public:
 
     String getDescription() override { return "Optimize if chain to multiIf"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/IfConstantConditionPass.cpp b/src/Analyzer/Passes/IfConstantConditionPass.cpp
index 6f9cfe482f1..35c6718f018 100644
--- a/src/Analyzer/Passes/IfConstantConditionPass.cpp
+++ b/src/Analyzer/Passes/IfConstantConditionPass.cpp
@@ -49,7 +49,7 @@ public:
 
 }
 
-void IfConstantConditionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
+void IfConstantConditionPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr)
 {
     IfConstantConditionVisitor visitor;
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/IfConstantConditionPass.h b/src/Analyzer/Passes/IfConstantConditionPass.h
index 7817e67aa5e..7548fc702bc 100644
--- a/src/Analyzer/Passes/IfConstantConditionPass.h
+++ b/src/Analyzer/Passes/IfConstantConditionPass.h
@@ -21,7 +21,7 @@ public:
 
     String getDescription() override { return "Optimize if, multiIf for constant condition."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp
index 562aff4cf05..32e3c3cda51 100644
--- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp
+++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp
@@ -205,7 +205,7 @@ public:
 
 }
 
-void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context)
+void IfTransformStringsToEnumPass::run(QueryTreeNodePtr & query, ContextPtr context)
 {
     ConvertStringsToEnumVisitor visitor(std::move(context));
     visitor.visit(query);
diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.h b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h
index a4a014967e0..522087aafae 100644
--- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.h
+++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h
@@ -33,7 +33,7 @@ public:
 
     String getDescription() override { return "Replaces string-type arguments in If and Transform to enum"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 13f8025f5ea..7e0b6b2f828 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -233,7 +233,7 @@ private:
     }
 };
 
-void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     LogicalExpressionOptimizerVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index 05c10ddc685..51d9968b48c 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -76,7 +76,7 @@ public:
 
     String getDescription() override { return "Transform equality chain to a single IN function or a constant if possible"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp
index 4672351bcfb..5012aa7fa78 100644
--- a/src/Analyzer/Passes/MultiIfToIfPass.cpp
+++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp
@@ -43,7 +43,7 @@ private:
 
 }
 
-void MultiIfToIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void MultiIfToIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     auto if_function_ptr = FunctionFactory::instance().get("if", context);
     MultiIfToIfVisitor visitor(std::move(if_function_ptr), std::move(context));
diff --git a/src/Analyzer/Passes/MultiIfToIfPass.h b/src/Analyzer/Passes/MultiIfToIfPass.h
index 2213f3713ed..e3c03913aaa 100644
--- a/src/Analyzer/Passes/MultiIfToIfPass.h
+++ b/src/Analyzer/Passes/MultiIfToIfPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Optimize multiIf with single condition to if."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
index d36be98751c..20b308c3af6 100644
--- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
+++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
@@ -64,7 +64,7 @@ private:
 
 }
 
-void NormalizeCountVariantsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void NormalizeCountVariantsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     NormalizeCountVariantsVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.h b/src/Analyzer/Passes/NormalizeCountVariantsPass.h
index 78a114f4a85..6cf9f34619a 100644
--- a/src/Analyzer/Passes/NormalizeCountVariantsPass.h
+++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.h
@@ -20,7 +20,7 @@ public:
 
     String getDescription() override { return "Optimize count(literal), sum(1) into count()."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp
index 5ed52f1210b..7c851d5fc35 100644
--- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp
+++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp
@@ -130,7 +130,7 @@ private:
     }
 };
 
-void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     OptimizeGroupByFunctionKeysVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h
index 632960c45bb..fd5eadcb796 100644
--- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h
+++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h
@@ -16,7 +16,7 @@ public:
 
     String getDescription() override { return "Eliminates functions of other keys in GROUP BY section."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index c6d312d0ecf..b6cc50caffe 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -124,7 +124,7 @@ private:
 
 }
 
-void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     OptimizeRedundantFunctionsInOrderByVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
index 609a6360d27..4a63c78022b 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
index 3632c41028b..26ca5984b49 100644
--- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
+++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
@@ -70,7 +70,7 @@ private:
 
 }
 
-void OrderByLimitByDuplicateEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
+void OrderByLimitByDuplicateEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr)
 {
     OrderByLimitByDuplicateEliminationVisitor visitor;
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h
index 11a025af5b9..de5e1898a4c 100644
--- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h
+++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h
@@ -20,7 +20,7 @@ public:
 
     String getDescription() override { return "Remove duplicate columns from ORDER BY, LIMIT BY."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp b/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp
index f70ec27ba5d..7c106082124 100644
--- a/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp
+++ b/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp
@@ -50,7 +50,7 @@ public:
 
 }
 
-void OrderByTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
+void OrderByTupleEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr)
 {
     OrderByTupleEliminationVisitor visitor;
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/OrderByTupleEliminationPass.h b/src/Analyzer/Passes/OrderByTupleEliminationPass.h
index 5665561e227..45c8a756795 100644
--- a/src/Analyzer/Passes/OrderByTupleEliminationPass.h
+++ b/src/Analyzer/Passes/OrderByTupleEliminationPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Remove tuple from ORDER BY."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index c454ad9f84f..1a76bc762a4 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -77,6 +77,8 @@
 #include <Analyzer/QueryTreeBuilder.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/Identifier.h>
+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
 
 namespace ProfileEvents
 {
@@ -1056,7 +1058,7 @@ private:
 class QueryAnalyzer
 {
 public:
-    void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context)
+    void resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context)
     {
         IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
 
@@ -1097,6 +1099,7 @@ public:
             {
                 if (table_expression)
                 {
+                    LOG_DEBUG(&Poco::Logger::get("resolve"), "Table expression: {}", table_expression->dumpTree());
                     scope.expression_join_tree_node = table_expression;
                     validateTableExpressionModifiers(scope.expression_join_tree_node, scope);
                     initializeTableExpressionData(scope.expression_join_tree_node, scope);
@@ -1106,6 +1109,7 @@ public:
                     resolveExpressionNodeList(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
                 else
                     resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+                LOG_DEBUG(&Poco::Logger::get("resolve"), "Result: {}", node->dumpTree());
 
                 break;
             }
@@ -2677,6 +2681,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
   */
 QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope)
 {
+    LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifierFromTableColumns"), "{} {}", scope.column_name_to_column_node.size(), !identifier_lookup.isExpressionLookup());
     if (scope.column_name_to_column_node.empty() || !identifier_lookup.isExpressionLookup())
         return {};
 
@@ -2836,11 +2841,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
         QueryTreeNodePtr result_expression;
         bool match_full_identifier = false;
 
+        LOG_DEBUG(&Poco::Logger::get("resolve_identifier_from_storage_or_throw"), "Looking for id: {}", identifier_without_column_qualifier.getFullName());
+
         auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
         if (it != table_expression_data.column_name_to_column_node.end())
         {
             match_full_identifier = true;
             result_expression = it->second;
+            LOG_DEBUG(&Poco::Logger::get("resolve_identifier_from_storage_or_throw"), "Found: {}", result_expression->dumpTree());
         }
         else
         {
@@ -5389,6 +5397,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             auto unresolved_identifier = identifier_node.getIdentifier();
             auto resolve_identifier_expression_result = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::EXPRESSION}, scope);
             auto resolved_identifier_node = resolve_identifier_expression_result.resolved_identifier;
+            LOG_DEBUG(&Poco::Logger::get("resolveExpressionNode"), "Resolved: {}", resolved_identifier_node ? resolved_identifier_node->dumpTree() : "Not resolved");
 
             if (resolved_identifier_node && result_projection_names.empty() &&
                 (resolve_identifier_expression_result.isResolvedFromJoinTree() || resolve_identifier_expression_result.isResolvedFromExpressionArguments()))
@@ -5470,6 +5479,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             }
 
             node = std::move(resolved_identifier_node);
+            LOG_DEBUG(&Poco::Logger::get("resolveExpressionNode"), "Result node: {}", node ? node->dumpTree() : "Not resolved");
 
             if (node->getNodeType() == QueryTreeNodeType::LIST)
             {
@@ -6173,6 +6183,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
             table_expression_data.should_qualify_columns = false;
     }
 
+    LOG_DEBUG(&Poco::Logger::get("Analyzer"), "Table data: {}", table_expression_data.dump());
     scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
 }
 
@@ -7152,7 +7163,7 @@ QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_)
     : table_expression(std::move(table_expression_))
 {}
 
-void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     QueryAnalyzer analyzer;
     analyzer.resolve(query_tree_node, table_expression, context);
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h
index fa8778ebf76..5d335d3e712 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.h
+++ b/src/Analyzer/Passes/QueryAnalysisPass.h
@@ -89,7 +89,7 @@ public:
         return "Resolve type for each query expression. Replace identifiers, matchers with query expressions. Perform constant folding. Evaluate scalar subqueries.";
     }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 private:
     QueryTreeNodePtr table_expression;
diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
index de264948d4c..2fe5a89578b 100644
--- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
+++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
@@ -108,7 +108,7 @@ private:
 }
 
 
-void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     RewriteAggregateFunctionWithIfVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h
index be8ad3ac34d..0a2fc1ba423 100644
--- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h
+++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h
@@ -20,7 +20,7 @@ public:
         return "Rewrite aggregate functions with if expression as argument when logically equivalent";
     }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp
index b28816e8ff3..c273aecc9b5 100644
--- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp
+++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp
@@ -58,7 +58,7 @@ public:
 
 }
 
-void ShardNumColumnToFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void ShardNumColumnToFunctionPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     ShardNumColumnToFunctionVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
index 71a038bcf39..248f4e29bbe 100644
--- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
+++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Rewrite _shard_num column into shardNum() function"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index d55af278152..04d6c134d10 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -180,7 +180,7 @@ private:
 
 }
 
-void SumIfToCountIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void SumIfToCountIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     SumIfToCountIfVisitor visitor(context);
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.h b/src/Analyzer/Passes/SumIfToCountIfPass.h
index f3ba47f1c2c..439d80c6306 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.h
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.h
@@ -23,7 +23,7 @@ public:
 
     String getDescription() override { return "Rewrite sum(if) and sumIf into countIf"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
index 5c4484457e8..e256934010d 100644
--- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
+++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
@@ -87,7 +87,7 @@ public:
 
 }
 
-void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     UniqInjectiveFunctionsEliminationVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h
index a0f07dfb7b5..c143fe2c39c 100644
--- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h
+++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h
@@ -17,7 +17,7 @@ public:
 
     String getDescription() override { return "Remove injective functions from uniq functions arguments."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp
index c64d82299ca..e9fa72f925d 100644
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@@ -494,8 +494,8 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
         return visitFunction(node);
 
     throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-        "Expected column, constant, function. Actual {}",
-        node->formatASTForErrorMessage());
+        "Expected column, constant, function. Actual {} with type: {}",
+        node->formatASTForErrorMessage(), node_type);
 }
 
 PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index b91ad0b963a..9f9f0fda9e2 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,6 +30,7 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
+#include "Analyzer/IQueryTreeNode.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -937,7 +938,8 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
             table_function_node->setTableExpressionModifiers(*table_expression_modifiers);
 
         QueryAnalysisPass query_analysis_pass;
-        query_analysis_pass.run(table_function_node, query_context);
+        QueryTreeNodePtr node = table_function_node;
+        query_analysis_pass.run(node, query_context);
 
         replacement_table_expression = std::move(table_function_node);
     }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index b0ed242d14d..a49155ac2d9 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -27,9 +27,18 @@
 #include <Parsers/ASTExpressionList.h>
 #include <DataTypes/DataTypeString.h>
 #include <Columns/ColumnString.h>
+#include "Common/logger_useful.h"
 #include <Common/typeid_cast.h>
 #include <Common/checkStackSize.h>
+#include "Analyzer/ColumnNode.h"
+#include "Analyzer/IQueryTreeNode.h"
+#include "Analyzer/Identifier.h"
+#include "Analyzer/IdentifierNode.h"
+#include "Analyzer/Passes/QueryAnalysisPass.h"
+#include "Analyzer/QueryTreeBuilder.h"
+#include "Core/NamesAndTypes.h"
 #include "DataTypes/IDataType.h"
+#include "Planner/PlannerActionsVisitor.h"
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@@ -42,6 +51,7 @@
 #include <Databases/IDatabase.h>
 #include <base/range.h>
 #include <algorithm>
+#include <memory>
 
 
 namespace
@@ -464,8 +474,8 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
         auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
 
-        auto modified_query_info = getModifiedQueryInfo(query_info, context, table, nested_storage_snaphsot);
         Names column_names_as_aliases;
+        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names_as_aliases);
 
         if (!context->getSettingsRef().allow_experimental_analyzer)
         {
@@ -553,10 +563,10 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
     pipeline.addResources(std::move(resources));
 }
 
-SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & query_info,
-    const ContextPtr & modified_context,
+SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
     const StorageWithLockAndName & storage_with_lock_and_name,
-    const StorageSnapshotPtr & storage_snapshot)
+    const StorageSnapshotPtr & storage_snapshot,
+    Names & column_names_as_aliases) const
 {
     const auto & [database_name, storage, storage_lock, table_name] = storage_with_lock_and_name;
     const StorageID current_storage_id = storage->getStorageID();
@@ -586,6 +596,47 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         if (!storage_snapshot->tryGetColumn(get_column_options, "_database"))
             column_name_to_node.emplace("_database", std::make_shared<ConstantNode>(current_storage_id.database_name));
 
+        auto storage_columns = storage_snapshot->metadata->getColumns();
+
+        bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty();
+        if (with_aliases)
+        {
+            auto filter_actions_dag = std::make_shared<ActionsDAG>();
+            for (const auto & column : column_names)
+            {
+                const auto column_default = storage_columns.getDefault(column);
+                bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
+
+                QueryTreeNodePtr column_node;
+
+                if (is_alias)
+                {
+                    column_node = buildQueryTree(column_default->expression, modified_context);
+
+                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT before: {}\n{}", column_node->dumpTree(), modified_query_info.table_expression->dumpTree());
+
+                    column_node->setAlias(column);
+
+                    QueryAnalysisPass query_analysis_pass(modified_query_info.table_expression);
+                    query_analysis_pass.run(column_node, modified_context);
+
+                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT after: {}", column_node->dumpTree());
+
+                    column_name_to_node.emplace(column, column_node);
+                }
+                else
+                {
+                    column_node = std::make_shared<ColumnNode>(NameAndTypePair{column, storage_columns.getColumn(get_column_options, column).type }, modified_query_info.table_expression);
+                }
+
+
+                PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/);
+                actions_visitor.visit(filter_actions_dag, column_node);
+            }
+            column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames();
+            LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Required names: {}", toString(column_names_as_aliases));
+        }
+
         if (!column_name_to_node.empty())
         {
             replaceColumns(modified_query_info.query_tree,
@@ -594,6 +645,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         }
 
         modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree);
+        LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Modified query: {}", modified_query_info.query->formatForLogging());
     }
     else
     {
@@ -640,6 +692,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         modified_select.setFinal();
     }
 
+    LOG_DEBUG(&Poco::Logger::get("createSources"), "real_column_names: {}", toString(real_column_names));
+
     bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer;
 
     auto storage_stage = storage->getQueryProcessingStage(modified_context,
@@ -783,7 +837,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
-        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
+        convertingSourceStream(header, modified_query_info, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
     }
 
     return builder;
@@ -957,9 +1011,10 @@ void StorageMerge::alter(
 
 void ReadFromMerge::convertingSourceStream(
     const Block & header,
+    SelectQueryInfo & modified_query_info,
     const StorageMetadataPtr & metadata_snapshot,
     const Aliases & aliases,
-    ContextPtr local_context,
+    ContextMutablePtr local_context,
     QueryPipelineBuilder & builder,
     const QueryProcessingStage::Enum & processed_stage)
 {
@@ -968,21 +1023,49 @@ void ReadFromMerge::convertingSourceStream(
     auto storage_sample_block = metadata_snapshot->getSampleBlock();
     auto pipe_columns = builder.getHeader().getNamesAndTypesList();
 
-    for (const auto & alias : aliases)
+    if (local_context->getSettingsRef().allow_experimental_analyzer)
     {
-        pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
-        ASTPtr expr = alias.expression;
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
-        auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context};
-
-        auto dag = std::make_shared<ActionsDAG>(pipe_columns);
-        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-        auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-
-        builder.addSimpleTransform([&](const Block & stream_header)
+        for (const auto & alias : aliases)
         {
-            return std::make_shared<ExpressionTransform>(stream_header, actions);
-        });
+            pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
+
+            auto actions_dag = std::make_shared<ActionsDAG>();
+
+            QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context);
+            query_tree->setAlias(alias.name);
+
+            QueryAnalysisPass query_analysis_pass(modified_query_info.table_expression);
+            query_analysis_pass.run(query_tree, local_context);
+
+            PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/);
+            actions_visitor.visit(actions_dag, query_tree);
+
+            auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+
+            builder.addSimpleTransform([&](const Block & stream_header)
+            {
+                return std::make_shared<ExpressionTransform>(stream_header, actions);
+            });
+        }
+    }
+    else
+    {
+        for (const auto & alias : aliases)
+        {
+            pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
+            ASTPtr expr = alias.expression;
+            auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+            auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context};
+
+            auto dag = std::make_shared<ActionsDAG>(pipe_columns);
+            auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+            auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+
+            builder.addSimpleTransform([&](const Block & stream_header)
+            {
+                return std::make_shared<ExpressionTransform>(stream_header, actions);
+            });
+        }
     }
 
     ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index babf0dd92e8..739d6831f6f 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -177,10 +177,10 @@ private:
 
     using Aliases = std::vector<AliasData>;
 
-    static SelectQueryInfo getModifiedQueryInfo(const SelectQueryInfo & query_info,
-        const ContextPtr & modified_context,
+    SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context,
         const StorageWithLockAndName & storage_with_lock_and_name,
-        const StorageSnapshotPtr & storage_snapshot);
+        const StorageSnapshotPtr & storage_snapshot,
+        Names & column_names_as_aliases) const;
 
     QueryPipelineBuilderPtr createSources(
         const StorageSnapshotPtr & storage_snapshot,
@@ -197,9 +197,10 @@ private:
 
     static void convertingSourceStream(
         const Block & header,
+        SelectQueryInfo & modified_query_info,
         const StorageMetadataPtr & metadata_snapshot,
         const Aliases & aliases,
-        ContextPtr context,
+        ContextMutablePtr context,
         QueryPipelineBuilder & builder,
         const QueryProcessingStage::Enum & processed_stage);
 };

From fc9ee3eb4e1e4c4b145bc39bc7ce507cf05b9d1d Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 13 Jun 2023 15:01:31 +0000
Subject: [PATCH 002/884] Correctly build the ActionsDAG

---
 src/Storages/StorageMerge.cpp | 28 +++++++++++++++++++++-------
 src/Storages/StorageMerge.h   |  3 ++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index a49155ac2d9..d036eaa9f25 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -475,7 +475,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
 
         Names column_names_as_aliases;
-        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names_as_aliases);
+        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names_as_aliases, aliases);
 
         if (!context->getSettingsRef().allow_experimental_analyzer)
         {
@@ -566,7 +566,8 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
     const StorageWithLockAndName & storage_with_lock_and_name,
     const StorageSnapshotPtr & storage_snapshot,
-    Names & column_names_as_aliases) const
+    Names & column_names_as_aliases,
+    Aliases & aliases) const
 {
     const auto & [database_name, storage, storage_lock, table_name] = storage_with_lock_and_name;
     const StorageID current_storage_id = storage->getStorageID();
@@ -611,18 +612,23 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
 
                 if (is_alias)
                 {
-                    column_node = buildQueryTree(column_default->expression, modified_context);
+                    // column_node = buildQueryTree(column_default->expression, modified_context);
+                    column_node = std::make_shared<IdentifierNode>(Identifier{column});
 
                     LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT before: {}\n{}", column_node->dumpTree(), modified_query_info.table_expression->dumpTree());
 
-                    column_node->setAlias(column);
-
                     QueryAnalysisPass query_analysis_pass(modified_query_info.table_expression);
                     query_analysis_pass.run(column_node, modified_context);
 
                     LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT after: {}", column_node->dumpTree());
 
+                    auto * resolved_column = column_node->as<ColumnNode>();
+                    if (!resolved_column || !resolved_column->getExpression())
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Alias column is not resolved");
+
+                    column_node = resolved_column->getExpression();
                     column_name_to_node.emplace(column, column_node);
+                    aliases.push_back({ .name = column, .type = resolved_column->getResultType(), .expression = column_node->toAST() });
                 }
                 else
                 {
@@ -634,6 +640,9 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
                 actions_visitor.visit(filter_actions_dag, column_node);
             }
             column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames();
+            if (column_names_as_aliases.empty())
+                column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
+
             LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Required names: {}", toString(column_names_as_aliases));
         }
 
@@ -1029,7 +1038,7 @@ void ReadFromMerge::convertingSourceStream(
         {
             pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
 
-            auto actions_dag = std::make_shared<ActionsDAG>();
+            auto actions_dag = std::make_shared<ActionsDAG>(pipe_columns);
 
             QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context);
             query_tree->setAlias(alias.name);
@@ -1038,7 +1047,12 @@ void ReadFromMerge::convertingSourceStream(
             query_analysis_pass.run(query_tree, local_context);
 
             PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/);
-            actions_visitor.visit(actions_dag, query_tree);
+            const auto & nodes = actions_visitor.visit(actions_dag, query_tree);
+
+            if (nodes.size() != 1)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size());
+
+            actions_dag->addOrReplaceInOutputs(actions_dag->addAlias(*nodes.front(), alias.name));
 
             auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
 
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 739d6831f6f..987869e5de3 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -180,7 +180,8 @@ private:
     SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context,
         const StorageWithLockAndName & storage_with_lock_and_name,
         const StorageSnapshotPtr & storage_snapshot,
-        Names & column_names_as_aliases) const;
+        Names & column_names_as_aliases,
+        Aliases & aliases) const;
 
     QueryPipelineBuilderPtr createSources(
         const StorageSnapshotPtr & storage_snapshot,

From 55b81a5a5e7ad73a3e53aee0d0b83731ff8e76ed Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 13 Jun 2023 23:13:18 +0000
Subject: [PATCH 003/884] Fix style

---
 src/Storages/StorageMerge.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index d036eaa9f25..e2a27d4e20e 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -80,6 +80,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
     extern const int NOT_IMPLEMENTED;
     extern const int ILLEGAL_PREWHERE;

From 6489922dc19a0fda86bdcc8e08c108812dc4aebf Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 16 Jun 2023 18:49:59 +0000
Subject: [PATCH 004/884] Fix for column aliases that use other aliases

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp |  9 ------
 src/Storages/StorageMerge.cpp             | 38 ++++++++++++++++++++---
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 1a76bc762a4..309f067c4c0 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1099,7 +1099,6 @@ public:
             {
                 if (table_expression)
                 {
-                    LOG_DEBUG(&Poco::Logger::get("resolve"), "Table expression: {}", table_expression->dumpTree());
                     scope.expression_join_tree_node = table_expression;
                     validateTableExpressionModifiers(scope.expression_join_tree_node, scope);
                     initializeTableExpressionData(scope.expression_join_tree_node, scope);
@@ -1109,7 +1108,6 @@ public:
                     resolveExpressionNodeList(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
                 else
                     resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
-                LOG_DEBUG(&Poco::Logger::get("resolve"), "Result: {}", node->dumpTree());
 
                 break;
             }
@@ -2681,7 +2679,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
   */
 QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope)
 {
-    LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifierFromTableColumns"), "{} {}", scope.column_name_to_column_node.size(), !identifier_lookup.isExpressionLookup());
     if (scope.column_name_to_column_node.empty() || !identifier_lookup.isExpressionLookup())
         return {};
 
@@ -2841,14 +2838,11 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
         QueryTreeNodePtr result_expression;
         bool match_full_identifier = false;
 
-        LOG_DEBUG(&Poco::Logger::get("resolve_identifier_from_storage_or_throw"), "Looking for id: {}", identifier_without_column_qualifier.getFullName());
-
         auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
         if (it != table_expression_data.column_name_to_column_node.end())
         {
             match_full_identifier = true;
             result_expression = it->second;
-            LOG_DEBUG(&Poco::Logger::get("resolve_identifier_from_storage_or_throw"), "Found: {}", result_expression->dumpTree());
         }
         else
         {
@@ -5397,7 +5391,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             auto unresolved_identifier = identifier_node.getIdentifier();
             auto resolve_identifier_expression_result = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::EXPRESSION}, scope);
             auto resolved_identifier_node = resolve_identifier_expression_result.resolved_identifier;
-            LOG_DEBUG(&Poco::Logger::get("resolveExpressionNode"), "Resolved: {}", resolved_identifier_node ? resolved_identifier_node->dumpTree() : "Not resolved");
 
             if (resolved_identifier_node && result_projection_names.empty() &&
                 (resolve_identifier_expression_result.isResolvedFromJoinTree() || resolve_identifier_expression_result.isResolvedFromExpressionArguments()))
@@ -5479,7 +5472,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             }
 
             node = std::move(resolved_identifier_node);
-            LOG_DEBUG(&Poco::Logger::get("resolveExpressionNode"), "Result node: {}", node ? node->dumpTree() : "Not resolved");
 
             if (node->getNodeType() == QueryTreeNodeType::LIST)
             {
@@ -6183,7 +6175,6 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
             table_expression_data.should_qualify_columns = false;
     }
 
-    LOG_DEBUG(&Poco::Logger::get("Analyzer"), "Table data: {}", table_expression_data.dump());
     scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
 }
 
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e2a27d4e20e..13548a84826 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -34,6 +34,7 @@
 #include "Analyzer/IQueryTreeNode.h"
 #include "Analyzer/Identifier.h"
 #include "Analyzer/IdentifierNode.h"
+#include "Analyzer/InDepthQueryTreeVisitor.h"
 #include "Analyzer/Passes/QueryAnalysisPass.h"
 #include "Analyzer/QueryTreeBuilder.h"
 #include "Core/NamesAndTypes.h"
@@ -564,6 +565,26 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
     pipeline.addResources(std::move(resources));
 }
 
+namespace
+{
+
+class ApplyAliasColumnExpressionsVisitor : public InDepthQueryTreeVisitor<ApplyAliasColumnExpressionsVisitor>
+{
+public:
+    ApplyAliasColumnExpressionsVisitor() = default;
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        if (auto * column = node->as<ColumnNode>();
+            column != nullptr && column->hasExpression())
+        {
+            node = column->getExpressionOrThrow();
+        }
+    }
+};
+
+}
+
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
     const StorageWithLockAndName & storage_with_lock_and_name,
     const StorageSnapshotPtr & storage_snapshot,
@@ -611,23 +632,28 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
 
                 QueryTreeNodePtr column_node;
 
+
                 if (is_alias)
                 {
                     // column_node = buildQueryTree(column_default->expression, modified_context);
-                    column_node = std::make_shared<IdentifierNode>(Identifier{column});
+                    QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column});
 
-                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT before: {}\n{}", column_node->dumpTree(), modified_query_info.table_expression->dumpTree());
+                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT before: {}\n{}", fake_node->dumpTree(), modified_query_info.table_expression->dumpTree());
 
                     QueryAnalysisPass query_analysis_pass(modified_query_info.table_expression);
-                    query_analysis_pass.run(column_node, modified_context);
+                    query_analysis_pass.run(fake_node, modified_context);
+
+                    auto * resolved_column = fake_node->as<ColumnNode>();
+
+                    column_node = fake_node;
+                    ApplyAliasColumnExpressionsVisitor visitor;
+                    visitor.visit(column_node);
 
                     LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT after: {}", column_node->dumpTree());
 
-                    auto * resolved_column = column_node->as<ColumnNode>();
                     if (!resolved_column || !resolved_column->getExpression())
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Alias column is not resolved");
 
-                    column_node = resolved_column->getExpression();
                     column_name_to_node.emplace(column, column_node);
                     aliases.push_back({ .name = column, .type = resolved_column->getResultType(), .expression = column_node->toAST() });
                 }
@@ -1095,6 +1121,8 @@ void ReadFromMerge::convertingSourceStream(
         std::move(convert_actions_dag),
         ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
 
+    LOG_DEBUG(&Poco::Logger::get("convertingSourceStream"), "The header: {}", builder.getHeader().dumpStructure());
+
     builder.addSimpleTransform([&](const Block & stream_header)
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);

From f9e67fe0427ee2d698d2b946a8286e228d47b0ec Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 19 Jun 2023 15:10:29 +0000
Subject: [PATCH 005/884] Update broken_tests.txt

---
 tests/broken_tests.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
index d49b4f391e5..1635c8740cc 100644
--- a/tests/broken_tests.txt
+++ b/tests/broken_tests.txt
@@ -24,7 +24,6 @@
 01173_transaction_control_queries
 01211_optimize_skip_unused_shards_type_mismatch
 01213_optimize_skip_unused_shards_DISTINCT
-01214_test_storage_merge_aliases_with_where
 01231_distributed_aggregation_memory_efficient_mix_levels
 01244_optimize_distributed_group_by_sharding_key
 01247_optimize_distributed_group_by_sharding_key_dist_on_dist
@@ -68,7 +67,6 @@
 01890_materialized_distributed_join
 01901_in_literal_shard_prune
 01925_join_materialized_columns
-01925_test_storage_merge_aliases
 01930_optimize_skip_unused_shards_rewrite_in
 01947_mv_subquery
 01951_distributed_push_down_limit

From dcdadd5f639def096bd330f987609d0c5740ca83 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 19 Jun 2023 15:18:04 +0000
Subject: [PATCH 006/884] Update broken_tests.txt

---
 tests/broken_tests.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
index 1635c8740cc..8b11c5f5413 100644
--- a/tests/broken_tests.txt
+++ b/tests/broken_tests.txt
@@ -99,7 +99,6 @@
 02494_optimize_group_by_function_keys_and_alias_columns
 02521_aggregation_by_partitions
 02554_fix_grouping_sets_predicate_push_down
-02575_merge_prewhere_different_default_kind
 02713_array_low_cardinality_string
 02707_skip_index_with_in
 02241_join_rocksdb_bs

From 20c752fb787a05f9180f791401afe56bf372acfc Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 19 Jun 2023 15:44:01 +0000
Subject: [PATCH 007/884] Fix generated query

---
 src/Storages/StorageMerge.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 13548a84826..22308c1d901 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -614,7 +614,11 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         std::unordered_map<std::string, QueryTreeNodePtr> column_name_to_node;
 
         if (!storage_snapshot->tryGetColumn(get_column_options, "_table"))
-            column_name_to_node.emplace("_table", std::make_shared<ConstantNode>(current_storage_id.table_name));
+        {
+            auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name);
+            table_name_node->setAlias("_table");
+            column_name_to_node.emplace("_table", table_name_node);
+        }
 
         if (!storage_snapshot->tryGetColumn(get_column_options, "_database"))
             column_name_to_node.emplace("_database", std::make_shared<ConstantNode>(current_storage_id.database_name));

From 118b84703bb0f08aa622b956b1207d9092f5f2d7 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 21 Jun 2023 01:51:34 +0200
Subject: [PATCH 008/884] WIP on StorageMerge and distributed JOIN

---
 src/Analyzer/ColumnNode.h     |  5 ++
 src/Storages/StorageMerge.cpp | 86 ++++++++++++++++++++++++++++++++---
 src/Storages/StorageMerge.h   |  2 +-
 3 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h
index b320df788c5..46e7c8eb500 100644
--- a/src/Analyzer/ColumnNode.h
+++ b/src/Analyzer/ColumnNode.h
@@ -108,6 +108,11 @@ public:
       */
     QueryTreeNodePtr getColumnSourceOrNull() const;
 
+    void setColumnSource(const QueryTreeNodePtr & source)
+    {
+        getSourceWeakPointer() = source;
+    }
+
     QueryTreeNodeType getNodeType() const override
     {
         return QueryTreeNodeType::COLUMN;
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 22308c1d901..85ec21b4765 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1,5 +1,6 @@
 #include <QueryPipeline/narrowPipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Storages/StorageDistributed.h>
 #include <Storages/StorageMerge.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageView.h>
@@ -51,6 +52,7 @@
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Databases/IDatabase.h>
 #include <base/range.h>
+#include <Poco/Logger.h>
 #include <algorithm>
 #include <memory>
 
@@ -583,6 +585,76 @@ public:
     }
 };
 
+bool hasUnknownColumn(const QueryTreeNodePtr & node,
+    QueryTreeNodePtr original_table_expression,
+    QueryTreeNodePtr replacement_table_expression)
+{
+    QueryTreeNodes stack = { node };
+    while (!stack.empty())
+    {
+        auto current = stack.back();
+        stack.pop_back();
+
+        switch (current->getNodeType())
+        {
+            case QueryTreeNodeType::CONSTANT:
+                break;
+            case QueryTreeNodeType::COLUMN:
+            {
+                auto * column_node = current->as<ColumnNode>();
+                auto source = column_node->getColumnSourceOrNull();
+                if (source != original_table_expression)
+                    return true;
+                else
+                    column_node->setColumnSource(replacement_table_expression);
+                break;
+            }
+            default:
+            {
+                for (const auto & child : node->getChildren())
+                {
+                    if (child)
+                        stack.push_back(child);
+                }
+            }
+        }
+    }
+    return false;
+}
+
+QueryTreeNodePtr removeJoin(
+    QueryTreeNodePtr query,
+    QueryTreeNodePtr original_table_expression,
+    QueryTreeNodePtr replacement_table_expression)
+{
+    auto * query_node = query->as<QueryNode>();
+    auto modified_query = query_node->cloneAndReplace(query_node->getJoinTree(), replacement_table_expression);
+
+    query_node = modified_query->as<QueryNode>();
+    query_node->getGroupBy().getNodes().clear();
+    query_node->getHaving() = {};
+    query_node->getOrderBy().getNodes().clear();
+
+    auto & projection = query_node->getProjection().getNodes();
+    auto projection_columns = query_node->getProjectionColumns();
+    for (size_t i = 0; i < projection.size();)
+    {
+        if (hasUnknownColumn(projection[i], original_table_expression, replacement_table_expression))
+        {
+            projection.erase(projection.begin() + i);
+            projection_columns.erase(projection_columns.begin() + i);
+            continue;
+        }
+        ++i;
+    }
+
+    query_node->resolveProjectionColumns(std::move(projection_columns));
+
+    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Query without JOIN:\n{}", modified_query->dumpTree());
+
+    return modified_query;
+}
+
 }
 
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
@@ -602,8 +674,9 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         if (query_info.table_expression_modifiers)
             replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers);
 
-        modified_query_info.query_tree = modified_query_info.query_tree->cloneAndReplace(modified_query_info.table_expression,
-            replacement_table_expression);
+        modified_query_info.query_tree = removeJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression);
+        // modified_query_info.query_tree = modified_query_info.query_tree->cloneAndReplace(modified_query_info.table_expression,
+        //     replacement_table_expression);
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
@@ -877,7 +950,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
-        convertingSourceStream(header, modified_query_info, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
+        convertingSourceStream(header, modified_query_info, storage_snapshot, aliases, modified_context, *builder, processed_stage);
     }
 
     return builder;
@@ -1052,7 +1125,7 @@ void StorageMerge::alter(
 void ReadFromMerge::convertingSourceStream(
     const Block & header,
     SelectQueryInfo & modified_query_info,
-    const StorageMetadataPtr & metadata_snapshot,
+    const StorageSnapshotPtr & snapshot,
     const Aliases & aliases,
     ContextMutablePtr local_context,
     QueryPipelineBuilder & builder,
@@ -1060,7 +1133,7 @@ void ReadFromMerge::convertingSourceStream(
 {
     Block before_block_header = builder.getHeader();
 
-    auto storage_sample_block = metadata_snapshot->getSampleBlock();
+    auto storage_sample_block = snapshot->metadata->getSampleBlock();
     auto pipe_columns = builder.getHeader().getNamesAndTypesList();
 
     if (local_context->getSettingsRef().allow_experimental_analyzer)
@@ -1115,7 +1188,8 @@ void ReadFromMerge::convertingSourceStream(
 
     ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
 
-    if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
+    if (local_context->getSettingsRef().allow_experimental_analyzer
+        && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast<const StorageDistributed *>(&snapshot->storage) != nullptr))
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 987869e5de3..de9480292f9 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -199,7 +199,7 @@ private:
     static void convertingSourceStream(
         const Block & header,
         SelectQueryInfo & modified_query_info,
-        const StorageMetadataPtr & metadata_snapshot,
+        const StorageSnapshotPtr & snapshot,
         const Aliases & aliases,
         ContextMutablePtr context,
         QueryPipelineBuilder & builder,

From 88fe30254a280286ac2bd2b6bcdc71865ec2aed2 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 21 Jun 2023 17:55:14 +0000
Subject: [PATCH 009/884] Small fixup

---
 src/Storages/StorageMerge.cpp | 12 +++++++++---
 tests/broken_tests.txt        |  1 -
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 85ec21b4765..d1ac3f57ae1 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -631,6 +631,10 @@ QueryTreeNodePtr removeJoin(
     auto modified_query = query_node->cloneAndReplace(query_node->getJoinTree(), replacement_table_expression);
 
     query_node = modified_query->as<QueryNode>();
+
+    //TODO: change the predicates to make it valid and execute it on shards.
+    query_node->getPrewhere() = {};
+    query_node->getWhere() = {};
     query_node->getGroupBy().getNodes().clear();
     query_node->getHaving() = {};
     query_node->getOrderBy().getNodes().clear();
@@ -675,8 +679,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
             replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers);
 
         modified_query_info.query_tree = removeJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression);
-        // modified_query_info.query_tree = modified_query_info.query_tree->cloneAndReplace(modified_query_info.table_expression,
-        //     replacement_table_expression);
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
@@ -694,7 +696,11 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         }
 
         if (!storage_snapshot->tryGetColumn(get_column_options, "_database"))
-            column_name_to_node.emplace("_database", std::make_shared<ConstantNode>(current_storage_id.database_name));
+        {
+            auto database_name_node = std::make_shared<ConstantNode>(current_storage_id.database_name);
+            database_name_node->setAlias("_database");
+            column_name_to_node.emplace("_database", database_name_node);
+        }
 
         auto storage_columns = storage_snapshot->metadata->getColumns();
 
diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
index e6b5fb4f631..f6e21a29eed 100644
--- a/tests/broken_tests.txt
+++ b/tests/broken_tests.txt
@@ -38,7 +38,6 @@
 01527_dist_sharding_key_dictGet_reload
 01528_allow_nondeterministic_optimize_skip_unused_shards
 01540_verbatim_partition_pruning
-01560_merge_distributed_join
 01563_distributed_query_finish
 01576_alias_column_rewrite
 01583_const_column_in_set_index

From 47fafdc32c320464bbd65468208bbc8e5b7ac62f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 21 Jun 2023 18:06:24 +0000
Subject: [PATCH 010/884] Code cleanup

---
 src/Storages/StorageDistributed.cpp |  1 -
 src/Storages/StorageMerge.cpp       | 35 ++++++++---------------------
 2 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 9f9f0fda9e2..b948ca946c3 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,7 +30,6 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
-#include "Analyzer/IQueryTreeNode.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index d1ac3f57ae1..1a0376edbf5 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -28,19 +28,17 @@
 #include <Parsers/ASTExpressionList.h>
 #include <DataTypes/DataTypeString.h>
 #include <Columns/ColumnString.h>
-#include "Common/logger_useful.h"
 #include <Common/typeid_cast.h>
 #include <Common/checkStackSize.h>
-#include "Analyzer/ColumnNode.h"
-#include "Analyzer/IQueryTreeNode.h"
-#include "Analyzer/Identifier.h"
-#include "Analyzer/IdentifierNode.h"
-#include "Analyzer/InDepthQueryTreeVisitor.h"
-#include "Analyzer/Passes/QueryAnalysisPass.h"
-#include "Analyzer/QueryTreeBuilder.h"
-#include "Core/NamesAndTypes.h"
-#include "DataTypes/IDataType.h"
-#include "Planner/PlannerActionsVisitor.h"
+#include <Analyzer/ColumnNode.h>
+#include <Analyzer/Identifier.h>
+#include <Analyzer/IdentifierNode.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/Passes/QueryAnalysisPass.h>
+#include <Analyzer/QueryTreeBuilder.h>
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/IDataType.h>
+#include <Planner/PlannerActionsVisitor.h>
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@@ -52,7 +50,6 @@
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Databases/IDatabase.h>
 #include <base/range.h>
-#include <Poco/Logger.h>
 #include <algorithm>
 #include <memory>
 
@@ -654,8 +651,6 @@ QueryTreeNodePtr removeJoin(
 
     query_node->resolveProjectionColumns(std::move(projection_columns));
 
-    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Query without JOIN:\n{}", modified_query->dumpTree());
-
     return modified_query;
 }
 
@@ -718,11 +713,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
 
                 if (is_alias)
                 {
-                    // column_node = buildQueryTree(column_default->expression, modified_context);
                     QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column});
 
-                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT before: {}\n{}", fake_node->dumpTree(), modified_query_info.table_expression->dumpTree());
-
                     QueryAnalysisPass query_analysis_pass(modified_query_info.table_expression);
                     query_analysis_pass.run(fake_node, modified_context);
 
@@ -732,8 +724,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
                     ApplyAliasColumnExpressionsVisitor visitor;
                     visitor.visit(column_node);
 
-                    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "QT after: {}", column_node->dumpTree());
-
                     if (!resolved_column || !resolved_column->getExpression())
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Alias column is not resolved");
 
@@ -752,8 +742,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
             column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames();
             if (column_names_as_aliases.empty())
                 column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
-
-            LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Required names: {}", toString(column_names_as_aliases));
         }
 
         if (!column_name_to_node.empty())
@@ -764,7 +752,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         }
 
         modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree);
-        LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Modified query: {}", modified_query_info.query->formatForLogging());
     }
     else
     {
@@ -811,8 +798,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         modified_select.setFinal();
     }
 
-    LOG_DEBUG(&Poco::Logger::get("createSources"), "real_column_names: {}", toString(real_column_names));
-
     bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer;
 
     auto storage_stage = storage->getQueryProcessingStage(modified_context,
@@ -1205,8 +1190,6 @@ void ReadFromMerge::convertingSourceStream(
         std::move(convert_actions_dag),
         ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
 
-    LOG_DEBUG(&Poco::Logger::get("convertingSourceStream"), "The header: {}", builder.getHeader().dumpStructure());
-
     builder.addSimpleTransform([&](const Block & stream_header)
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);

From 97a1ea01badaba10235ab0b01777f324b2f8365e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 22 Jun 2023 15:10:53 +0000
Subject: [PATCH 011/884] Fix removeJoin

---
 src/Storages/StorageMerge.cpp | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 1a0376edbf5..fd7c0aae479 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -36,6 +36,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/Passes/QueryAnalysisPass.h>
 #include <Analyzer/QueryTreeBuilder.h>
+#include <Analyzer/TableFunctionNode.h>
 #include <Core/NamesAndTypes.h>
 #include <DataTypes/IDataType.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -625,7 +626,8 @@ QueryTreeNodePtr removeJoin(
     QueryTreeNodePtr replacement_table_expression)
 {
     auto * query_node = query->as<QueryNode>();
-    auto modified_query = query_node->cloneAndReplace(query_node->getJoinTree(), replacement_table_expression);
+    auto join_tree = query_node->getJoinTree();
+    auto modified_query = query_node->cloneAndReplace(join_tree, replacement_table_expression);
 
     query_node = modified_query->as<QueryNode>();
 
@@ -636,20 +638,23 @@ QueryTreeNodePtr removeJoin(
     query_node->getHaving() = {};
     query_node->getOrderBy().getNodes().clear();
 
-    auto & projection = query_node->getProjection().getNodes();
-    auto projection_columns = query_node->getProjectionColumns();
-    for (size_t i = 0; i < projection.size();)
+    if (join_tree->as<TableNode>() == nullptr && join_tree->as<TableFunctionNode>() == nullptr)
     {
-        if (hasUnknownColumn(projection[i], original_table_expression, replacement_table_expression))
+        auto & projection = query_node->getProjection().getNodes();
+        auto projection_columns = query_node->getProjectionColumns();
+        for (size_t i = 0; i < projection.size();)
         {
-            projection.erase(projection.begin() + i);
-            projection_columns.erase(projection_columns.begin() + i);
-            continue;
+            if (hasUnknownColumn(projection[i], original_table_expression, replacement_table_expression))
+            {
+                projection.erase(projection.begin() + i);
+                projection_columns.erase(projection_columns.begin() + i);
+                continue;
+            }
+            ++i;
         }
-        ++i;
-    }
 
-    query_node->resolveProjectionColumns(std::move(projection_columns));
+        query_node->resolveProjectionColumns(std::move(projection_columns));
+    }
 
     return modified_query;
 }

From 83022b77714a204ef4025d0b5081fbc127f2a586 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sat, 2 Sep 2023 21:56:36 +0200
Subject: [PATCH 012/884] Added support for parameterized view with analyzer by
 analyzing the select part with default values

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp   | 47 +++++++++++++++++++++
 src/Analyzer/TableFunctionNode.cpp          |  7 +++
 src/Analyzer/TableFunctionNode.h            |  3 ++
 src/Interpreters/InterpreterCreateQuery.cpp | 38 +++++++++++++++--
 4 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 348189854e8..c82d3079118 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -77,6 +77,12 @@
 #include <Analyzer/QueryTreeBuilder.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/Identifier.h>
+#include <Parsers/FunctionParameterValuesVisitor.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Storages/StorageView.h>
+#include <Interpreters/InterpreterSelectWithUnionQuery.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 namespace ProfileEvents
 {
@@ -6210,8 +6216,49 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     QueryExpressionsAliasVisitor & expressions_visitor,
     bool nested_table_function)
 {
+
+    String database_name = scope.context->getCurrentDatabase();
+    String table_name = table_function_node->getOriginalAST()->as<ASTFunction>()->name;
+
+    if (table_function_node->getOriginalAST()->as<ASTFunction>()->is_compound_name)
+    {
+        std::vector<std::string> parts;
+        splitInto<'.'>(parts, table_function_node->getOriginalAST()->as<ASTFunction>()->name);
+
+        if (parts.size() == 2)
+        {
+            database_name = parts[0];
+            table_name = parts[1];
+        }
+    }
+
     auto & table_function_node_typed = table_function_node->as<TableFunctionNode &>();
 
+    StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, scope.context->getQueryContext());
+    if (table)
+    {
+        if (table.get()->isView() && table->as<StorageView>() && table->as<StorageView>()->isParameterizedView())
+        {
+            auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
+            NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_function_node->getOriginalAST());
+            StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
+
+            ASTCreateQuery create;
+            create.select = query->as<ASTSelectWithUnionQuery>();
+            auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, scope.context);
+            auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
+                                                     create,
+                                                     ColumnsDescription(sample_block.getNamesAndTypesList()),
+                    /* comment */ "",
+                    /* is_parameterized_view */ true);
+            res->startup();
+            table_function_node->getOriginalAST()->as<ASTFunction>()->prefer_subquery_to_function_formatting = true;
+            table_function_node_typed.resolve(std::move(res), scope.context);
+            return;
+        }
+    }
+
+
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
 
diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp
index e5158a06373..f4ffe7f4ee5 100644
--- a/src/Analyzer/TableFunctionNode.cpp
+++ b/src/Analyzer/TableFunctionNode.cpp
@@ -36,6 +36,13 @@ void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePt
     unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_);
 }
 
+void TableFunctionNode::resolve(StoragePtr storage_value, ContextPtr context)
+{
+    storage = std::move(storage_value);
+    storage_id = storage->getStorageID();
+    storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
+}
+
 const StorageID & TableFunctionNode::getStorageID() const
 {
     if (!storage)
diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h
index 69237ac8416..ed1a26c4dd4 100644
--- a/src/Analyzer/TableFunctionNode.h
+++ b/src/Analyzer/TableFunctionNode.h
@@ -100,6 +100,9 @@ public:
     /// Resolve table function with table function, storage and context
     void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_);
 
+    /// Resolve table function as parameterized view with storage and context
+    void resolve(StoragePtr storage_value, ContextPtr context);
+
     /// Get storage id, throws exception if function node is not resolved
     const StorageID & getStorageID() const;
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 92d74f4f18a..58b6722aae9 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -76,7 +76,8 @@
 
 #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
 #include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
-
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 namespace DB
 {
@@ -745,12 +746,43 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
     }
     else if (create.select)
     {
-
         Block as_select_sample;
 
         if (getContext()->getSettingsRef().allow_experimental_analyzer)
         {
-            as_select_sample = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext());
+            if (create.isParameterizedView())
+            {
+                auto select = create.select->clone();
+
+                ///Get all query parameters
+                const auto parameters = analyzeReceiveQueryParamsWithType(select);
+                NameToNameMap parameter_values;
+
+                for (const auto & parameter : parameters)
+                {
+                    const auto data_type = DataTypeFactory::instance().get(parameter.second);
+                    /// Todo improve getting default values & include more datatypes
+                    if (data_type->isValueRepresentedByNumber() || parameter.second == "String")
+                        parameter_values[parameter.first] = "1";
+                    else if (parameter.second.starts_with("Array") || parameter.second.starts_with("Map"))
+                        parameter_values[parameter.first] = "[]";
+                    else
+                        parameter_values[parameter.first] = " ";
+                    LOG_INFO(&Poco::Logger::get("InterpreterCreateQuery"), "parameter =  {}  = {} ", parameter.first, parameter_values[parameter.first]);
+
+                }
+
+                /// Replace with default parameters
+                ReplaceQueryParameterVisitor visitor(parameter_values);
+                visitor.visit(select);
+
+                as_select_sample = InterpreterSelectQueryAnalyzer::getSampleBlock(select, getContext());
+            }
+            else
+            {
+                as_select_sample = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext());
+            }
+
         }
         else
         {

From 2dfda84da0e16c594df7df4eb2b05ee1baba1193 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sat, 2 Sep 2023 21:57:57 +0200
Subject: [PATCH 013/884] Removed parameterized view tests from
 analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 15d46403da9..5521234495f 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -88,7 +88,6 @@
 02402_merge_engine_with_view
 02404_memory_bound_merging
 02426_orc_bug
-02428_parameterized_view
 02458_use_structure_from_insertion_table
 02479_race_condition_between_insert_and_droppin_mv
 02493_inconsistent_hex_and_binary_number
@@ -123,7 +122,6 @@
 02581_share_big_sets_between_mutation_tasks_long
 02581_share_big_sets_between_multiple_mutations_tasks_long
 00992_system_parts_race_condition_zookeeper_long
-02818_parameterized_view_with_cte_multiple_usage
 02790_optimize_skip_unused_shards_join
 01940_custom_tld_sharding_key
 02815_range_dict_no_direct_join

From 59195e1199d5c8ed31f4243b58f3186771219295 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 4 Sep 2023 19:03:23 +0200
Subject: [PATCH 014/884] Removed log for each parameter

---
 src/Interpreters/InterpreterCreateQuery.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 58b6722aae9..66c219dcd56 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -768,8 +768,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
                         parameter_values[parameter.first] = "[]";
                     else
                         parameter_values[parameter.first] = " ";
-                    LOG_INFO(&Poco::Logger::get("InterpreterCreateQuery"), "parameter =  {}  = {} ", parameter.first, parameter_values[parameter.first]);
-
                 }
 
                 /// Replace with default parameters

From 961bf074daf0c901a3e9d14b6caa4ba6cb37cc7c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 20 Nov 2023 10:56:10 +0100
Subject: [PATCH 015/884] Initial draft version of adding backup support to
 AzureBlobStorage

---
 src/Backups/BackupFactory.cpp                 |   2 +
 src/Backups/BackupIO_AzureBlobStorage.cpp     | 336 ++++++++++++++++++
 src/Backups/BackupIO_AzureBlobStorage.h       |  69 ++++
 src/Backups/BackupImpl.cpp                    |   8 +-
 .../registerBackupEngineAzureBlobStorage.cpp  | 134 +++++++
 src/CMakeLists.txt                            |   3 +
 src/Common/ProfileEvents.cpp                  |   4 +
 .../copyAzureBlobStorageFile.cpp              | 324 +++++++++++++++++
 .../copyAzureBlobStorageFile.h                |  58 +++
 src/Storages/StorageAzureBlob.cpp             |  11 +
 src/Storages/StorageAzureBlob.h               |   1 +
 .../__init__.py                               |   1 +
 .../configs/config.xml                        |  11 +
 .../configs/disable_profilers.xml             |  13 +
 .../configs/users.xml                         |   8 +
 .../test.py                                   | 151 ++++++++
 16 files changed, 1132 insertions(+), 2 deletions(-)
 create mode 100644 src/Backups/BackupIO_AzureBlobStorage.cpp
 create mode 100644 src/Backups/BackupIO_AzureBlobStorage.h
 create mode 100644 src/Backups/registerBackupEngineAzureBlobStorage.cpp
 create mode 100644 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
 create mode 100644 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
 create mode 100644 tests/integration/test_backup_restore_azure_blob_storage/__init__.py
 create mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
 create mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
 create mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml
 create mode 100644 tests/integration/test_backup_restore_azure_blob_storage/test.py

diff --git a/src/Backups/BackupFactory.cpp b/src/Backups/BackupFactory.cpp
index 898ac7bc490..31e87a21fc2 100644
--- a/src/Backups/BackupFactory.cpp
+++ b/src/Backups/BackupFactory.cpp
@@ -33,11 +33,13 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat
 
 void registerBackupEnginesFileAndDisk(BackupFactory &);
 void registerBackupEngineS3(BackupFactory &);
+void registerBackupEngineAzureBlobStorage(BackupFactory &);
 
 void registerBackupEngines(BackupFactory & factory)
 {
     registerBackupEnginesFileAndDisk(factory);
     registerBackupEngineS3(factory);
+    registerBackupEngineAzureBlobStorage(factory);
 }
 
 BackupFactory::BackupFactory()
diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
new file mode 100644
index 00000000000..d41d23e3c36
--- /dev/null
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -0,0 +1,336 @@
+#include <Backups/BackupIO_AzureBlobStorage.h>
+
+#if USE_AZURE_BLOB_STORAGE
+#include <Common/quoteString.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Interpreters/Context.h>
+#include <IO/SharedThreadPools.h>
+#include <IO/HTTPHeaderEntries.h>
+#include <Storages/StorageAzureBlobCluster.h>
+#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
+#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
+#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
+#include <Disks/IDisk.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <filesystem>
+
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int AZURE_BLOB_STORAGE_ERROR;
+    extern const int LOGICAL_ERROR;
+}
+
+//using AzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
+
+BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
+    StorageAzureBlob::Configuration configuration_,
+    const ReadSettings & read_settings_,
+    const WriteSettings & write_settings_,
+    const ContextPtr & context_)
+    : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderAzureBlobStorage"))
+    , data_source_description{DataSourceType::AzureBlobStorage, "AzureBlobStorage", false, false}
+    , configuration(configuration_)
+{
+    client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
+    auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
+    object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
+                                                          std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(*client.get()),
+                                                          std::move(settings_as_unique_ptr));
+}
+
+BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
+
+bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    return object_storage->exists(StoredObject(key));
+}
+
+UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
+    return object_metadata.size_bytes;
+}
+
+std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    return std::make_unique<ReadBufferFromAzureBlobStorage>(
+        client, key, read_settings, settings->max_single_read_retries,
+        settings->max_single_download_retries);
+}
+
+void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
+                                    DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
+{
+    LOG_INFO(&Poco::Logger::get("BackupReaderAzureBlobStorage"), "Enter copyFileToDisk");
+
+    /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
+    /// We don't check for `has_throttling` here because the native copy almost doesn't use network.
+    auto destination_data_source_description = destination_disk->getDataSourceDescription();
+    if (destination_data_source_description.sameKind(data_source_description)
+        && (destination_data_source_description.is_encrypted == encrypted_in_backup))
+    {
+        LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
+        auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes) -> size_t
+        {
+            /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
+            if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Blob writing function called with unexpected blob_path.size={} or mode={}",
+                                blob_path.size(), mode);
+
+            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client;
+            if (configuration.container == blob_path[1])
+            {
+                dest_client = client;
+            }
+            else
+            {
+                StorageAzureBlob::Configuration dest_configuration = configuration;
+                dest_configuration.container = blob_path[1];
+                dest_configuration.blob_path = blob_path[0];
+                dest_client = StorageAzureBlob::createClient(dest_configuration, /* is_read_only */ false);
+            }
+
+
+            copyAzureBlobStorageFile(
+                client,
+                dest_client,
+                configuration.container,
+                fs::path(configuration.blob_path) / path_in_backup,
+                0,
+                file_size,
+                /* dest_bucket= */ blob_path[1],
+                /* dest_key= */ blob_path[0],
+                settings,
+                read_settings,
+                object_attributes,
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderAzureBlobStorage"),
+                /* for_disk_azure_blob_storage= */ true);
+
+            return file_size;
+        };
+
+        destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
+        return; /// copied!
+    }
+
+    /// Fallback to copy through buffers.
+    BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
+}
+
+
+BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
+    StorageAzureBlob::Configuration configuration_,
+    const ReadSettings & read_settings_,
+    const WriteSettings & write_settings_,
+    const ContextPtr & context_)
+    : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterAzureBlobStorage"))
+    , data_source_description{DataSourceType::AzureBlobStorage, "AzureBlobStorage", false, false}
+    , configuration(configuration_)
+{
+    client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
+    auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
+    object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
+                                                          std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(*client.get()),
+                                                                  std::move(settings_as_unique_ptr));
+}
+
+void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
+                                      bool copy_encrypted, UInt64 start_pos, UInt64 length)
+{
+    /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
+    auto source_data_source_description = src_disk->getDataSourceDescription();
+    if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
+    {
+        /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage bucket.
+        /// In this case we can't use the native copy.
+        if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
+        {
+
+            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client;
+            if (configuration.container == blob_path[1])
+            {
+                src_client = client;
+            }
+            else
+            {
+                StorageAzureBlob::Configuration src_configuration = configuration;
+                src_configuration.container = blob_path[1];
+                src_configuration.blob_path = blob_path[0];
+                src_client = StorageAzureBlob::createClient(src_configuration, /* is_read_only */ false);
+            }
+
+            LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
+            copyAzureBlobStorageFile(
+                src_client,
+                client,
+                /* src_bucket */ blob_path[1],
+                /* src_key= */ blob_path[0],
+                start_pos,
+                length,
+                configuration.container,
+                fs::path(configuration.blob_path) / path_in_backup,
+                settings,
+                read_settings,
+                {},
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterAzureBlobStorage"));
+            return; /// copied!
+        }
+    }
+
+    /// Fallback to copy through buffers.
+    BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
+}
+
+void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
+{
+    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, {},
+                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterAzureBlobStorage"));
+}
+
+BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
+
+bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    LOG_INFO(&Poco::Logger::get("BackupWriterAzureBlobStorage"), "Result fileExists   {} ", object_storage->exists(StoredObject(key)));
+
+    return object_storage->exists(StoredObject(key));
+}
+
+UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
+{
+    LOG_INFO(&Poco::Logger::get("BackupWriterAzureBlobStorage"), "Enter getFileSize");
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    RelativePathsWithMetadata children;
+    object_storage->listObjects(key,children,/*max_keys*/0);
+    if (children.empty())
+        throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object {} must exist");
+    return children[0].metadata.size_bytes;
+}
+
+std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+
+    return std::make_unique<ReadBufferFromAzureBlobStorage>(
+        client, key, read_settings, settings->max_single_read_retries,
+        settings->max_single_download_retries);
+}
+
+std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    return std::make_unique<WriteBufferFromAzureBlobStorage>(
+        client,
+        key,
+        settings->max_single_part_upload_size,
+        DBMS_DEFAULT_BUFFER_SIZE,
+        write_settings);
+}
+
+void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
+{
+    String key;
+    if (startsWith(file_name, "."))
+    {
+        key= configuration.blob_path + file_name;
+    }
+    else
+    {
+        key = file_name;
+    }
+    StoredObject object(key);
+    object_storage->removeObjectIfExists(object);
+}
+
+void BackupWriterAzureBlobStorage::removeFiles(const Strings & keys)
+{
+    StoredObjects objects;
+    for (const auto & key : keys)
+        objects.emplace_back(key);
+
+    object_storage->removeObjectsIfExist(objects);
+
+}
+
+void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & keys)
+{
+    StoredObjects objects;
+    for (const auto & key : keys)
+        objects.emplace_back(key);
+
+    object_storage->removeObjectsIfExist(objects);
+}
+
+}
+
+#endif
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
new file mode 100644
index 00000000000..6ef66fc432d
--- /dev/null
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+#include <Backups/BackupIO_Default.h>
+#include <Disks/DiskType.h>
+#include <Storages/StorageAzureBlobCluster.h>
+#include <Interpreters/Context_fwd.h>
+
+
+namespace DB
+{
+
+//    using AzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
+
+/// Represents a backup stored to Azure
+    class BackupReaderAzureBlobStorage : public BackupReaderDefault
+    {
+    public:
+        BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+        ~BackupReaderAzureBlobStorage() override;
+
+        bool fileExists(const String & file_name) override;
+        UInt64 getFileSize(const String & file_name) override;
+        std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
+
+        void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
+                            DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
+
+    private:
+        const DataSourceDescription data_source_description;
+        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+        StorageAzureBlob::Configuration configuration;
+        std::unique_ptr<AzureObjectStorage> object_storage;
+        std::shared_ptr<AzureObjectStorageSettings> settings;
+    };
+
+
+    class BackupWriterAzureBlobStorage : public BackupWriterDefault
+    {
+    public:
+        BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+        ~BackupWriterAzureBlobStorage() override;
+
+        bool fileExists(const String & file_name) override;
+        UInt64 getFileSize(const String & file_name) override;
+        std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
+
+        void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
+        void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
+                              bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
+
+        void removeFile(const String & file_name) override;
+        void removeFiles(const Strings & file_names) override;
+
+    private:
+        std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
+        void removeFilesBatch(const Strings & file_names);
+        const DataSourceDescription data_source_description;
+        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+        StorageAzureBlob::Configuration configuration;
+        std::unique_ptr<AzureObjectStorage> object_storage;
+        std::shared_ptr<AzureObjectStorageSettings> settings;
+    };
+
+}
+
+#endif
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index bb97335d8fb..9363ca5e7a7 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -492,6 +492,7 @@ void BackupImpl::checkBackupDoesntExist() const
     else
         file_name_to_check_existence = ".backup";
 
+    LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkBackupDoesntExist 1");
     if (writer->fileExists(file_name_to_check_existence))
         throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name_for_logging);
 
@@ -499,6 +500,7 @@ void BackupImpl::checkBackupDoesntExist() const
     if (!is_internal_backup)
     {
         assert(!lock_file_name.empty());
+        LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkBackupDoesntExist 2");
         if (writer->fileExists(lock_file_name))
             throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name_for_logging);
     }
@@ -522,6 +524,8 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
 
     if (throw_if_failed)
     {
+        LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkLockFile");
+
         if (!writer->fileExists(lock_file_name))
         {
             throw Exception(
@@ -886,12 +890,12 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
     }
     else if (src_disk && from_immutable_file)
     {
-        LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
+        LOG_INFO(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
         writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
     }
     else
     {
-        LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
+        LOG_INFO(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
         auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
         writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
     }
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
new file mode 100644
index 00000000000..6f7b5f38c28
--- /dev/null
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -0,0 +1,134 @@
+#include "config.h"
+
+#include <Backups/BackupFactory.h>
+#include <Common/Exception.h>
+
+#if USE_AZURE_BLOB_STORAGE
+#include <Backups/BackupIO_AzureBlobStorage.h>
+#include <Storages/StorageAzureBlob.h>
+#include <Backups/BackupImpl.h>
+#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
+#include <Interpreters/Context.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <filesystem>
+#endif
+
+
+namespace DB
+{
+namespace fs = std::filesystem;
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int SUPPORT_IS_DISABLED;
+}
+
+#if USE_AZURE_BLOB_STORAGE
+namespace
+{
+    String removeFileNameFromURL(String & url)
+    {
+        Poco::URI url2{url};
+        String path = url2.getPath();
+        size_t slash_pos = path.find_last_of('/');
+        String file_name = path.substr(slash_pos + 1);
+        path.resize(slash_pos + 1);
+        url2.setPath(path);
+        url = url2.toString();
+        return file_name;
+    }
+}
+#endif
+
+
+void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
+{
+    auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
+    {
+#if USE_AZURE_BLOB_STORAGE
+        const String & id_arg = params.backup_info.id_arg;
+        const auto & args = params.backup_info.args;
+
+        LOG_INFO(&Poco::Logger::get("registerBackupEngineAzureBlobStorage"), "Begin id_arg={} args.size={}", id_arg, args.size());
+
+        StorageAzureBlob::Configuration configuration;
+
+        if (args.size() == 4)
+        {
+            configuration.connection_url = args[0].safeGet<String>();
+            configuration.is_connection_string = true;
+
+            configuration.container =  args[1].safeGet<String>();
+            configuration.blob_path = args[2].safeGet<String>();
+            configuration.format = args[3].safeGet<String>();
+
+            LOG_TRACE(&Poco::Logger::get("registerBackupEngineAzureBlobStorage"), "configuration.connection_url = {}"
+                                                                                 "configuration.container = {}"
+                                                                                 "configuration.blob_path = {}"
+                                                                                 "configuration.format = {}",
+                                                                                 configuration.connection_url, configuration.container, configuration.blob_path, configuration.format);
+        }
+
+
+        BackupImpl::ArchiveParams archive_params;
+        if (hasRegisteredArchiveFileExtension(configuration.blob_path))
+        {
+            if (params.is_internal_backup)
+                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
+
+            archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
+            archive_params.compression_method = params.compression_method;
+            archive_params.compression_level = params.compression_level;
+            archive_params.password = params.password;
+        }
+        else
+        {
+            if (!params.password.empty())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
+        }
+
+
+        if (params.open_mode == IBackup::OpenMode::READ)
+        {
+            auto reader = std::make_shared<BackupReaderAzureBlobStorage>(configuration,
+                                                           params.read_settings,
+                                                           params.write_settings,
+                                                           params.context);
+
+            return std::make_unique<BackupImpl>(
+                params.backup_info,
+                archive_params,
+                params.base_backup_info,
+                reader,
+                params.context,
+                /*params.use_same_s3_credentials_for_base_backup*/ false);
+        }
+        else
+        {
+            auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
+                                                           params.read_settings,
+                                                           params.write_settings,
+                                                           params.context);
+
+            return std::make_unique<BackupImpl>(
+                params.backup_info,
+                archive_params,
+                params.base_backup_info,
+                writer,
+                params.context,
+                params.is_internal_backup,
+                params.backup_coordination,
+                params.backup_uuid,
+                params.deduplicate_files,
+                /*params.use_same_s3_credentials_for_base_backup*/ false);
+        }
+#else
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "AzureBlobStorage support is disabled");
+#endif
+    };
+
+    factory.registerBackupEngine("AzureBlobStorage", creator_fn);
+}
+
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0257b7d329b..984594a6541 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -87,6 +87,7 @@ add_headers_and_sources(clickhouse_common_io IO)
 add_headers_and_sources(clickhouse_common_io IO/Archives)
 add_headers_and_sources(clickhouse_common_io IO/Resource)
 add_headers_and_sources(clickhouse_common_io IO/S3)
+add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage)
 list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
 
 
@@ -139,6 +140,7 @@ endif()
 
 if (TARGET ch_contrib::azure_sdk)
     add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage)
+    add_headers_and_sources(dbms IO/AzureBlobStorage)
 endif()
 
 if (TARGET ch_contrib::hdfs)
@@ -485,6 +487,7 @@ if (TARGET ch_contrib::aws_s3)
 endif()
 
 if (TARGET ch_contrib::azure_sdk)
+    target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk)
     dbms_target_link_libraries (PRIVATE ch_contrib::azure_sdk)
 endif()
 
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 58e860ebcaf..1655d19986a 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -361,6 +361,10 @@ The server successfully detected this situation and will download merged part fr
     M(S3PutObject, "Number of S3 API PutObject calls.") \
     M(S3GetObject, "Number of S3 API GetObject calls.") \
     \
+    M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
+    M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
+    M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
+    M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
     M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
     M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
     \
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
new file mode 100644
index 00000000000..bf0bcac664b
--- /dev/null
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -0,0 +1,324 @@
+#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <Common/ProfileEvents.h>
+#include <Common/typeid_cast.h>
+#include <Interpreters/Context.h>
+#include <IO/LimitSeekableReadBuffer.h>
+#include <IO/SeekableReadBuffer.h>
+#include <IO/StdStreamFromReadBuffer.h>
+#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
+#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
+#include <Common/getRandomASCIIString.h>
+#include <IO/SharedThreadPools.h>
+
+namespace ProfileEvents
+{
+    extern const Event AzureCopyObject;
+    extern const Event AzureUploadPart;
+
+    extern const Event DiskAzureCopyObject;
+    extern const Event DiskAzureUploadPart;
+}
+
+
+namespace DB
+{
+
+size_t max_single_operation_copy_size = 256 * 1024 * 1024;
+
+
+namespace
+{
+    class UploadHelper
+    {
+    public:
+        UploadHelper(
+            const CreateReadBuffer & create_read_buffer_,
+            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client_,
+            size_t offset_,
+            size_t total_size_,
+            const String & dest_bucket_,
+            const String & dest_key_,
+            std::shared_ptr<AzureObjectStorageSettings> settings_,
+            const std::optional<std::map<String, String>> & object_metadata_,
+            ThreadPoolCallbackRunner<void> schedule_,
+            bool for_disk_azure_blob_storage_)
+            : create_read_buffer(create_read_buffer_)
+            , client(client_)
+            , offset (offset_)
+            , total_size (total_size_)
+            , dest_bucket(dest_bucket_)
+            , dest_key(dest_key_)
+            , settings(settings_)
+            , object_metadata(object_metadata_)
+            , schedule(schedule_)
+            , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
+            , log(&Poco::Logger::get("azureBlobStorageUploadHelper"))
+            , max_single_part_upload_size(settings_.get()->max_single_part_upload_size)
+        {
+        }
+
+        ~UploadHelper() {}
+
+    protected:
+        std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
+        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+        size_t offset;
+        size_t total_size;
+        const String & dest_bucket;
+        const String & dest_key;
+        std::shared_ptr<AzureObjectStorageSettings> settings;
+        const std::optional<std::map<String, String>> & object_metadata;
+        ThreadPoolCallbackRunner<void> schedule;
+        bool for_disk_azure_blob_storage;
+        const Poco::Logger * log;
+        size_t max_single_part_upload_size;
+
+        struct UploadPartTask
+        {
+            char *data = nullptr;
+            size_t size = 0;
+            std::string block_id;
+            bool is_finished = false;
+            std::exception_ptr exception;
+
+            ~UploadPartTask()
+            {
+                if (data != nullptr)
+                    free(data);
+            }
+        };
+
+        size_t normal_part_size;
+        std::vector<std::string> block_ids;
+
+        std::list<UploadPartTask> TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks;
+        int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
+        int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
+        std::mutex bg_tasks_mutex;
+        std::condition_variable bg_tasks_condvar;
+
+    public:
+        void performCopy()
+        {
+            performMultipartUpload();
+        }
+
+        void completeMultipartUpload()
+        {
+            auto block_blob_client = client->GetBlockBlobClient(dest_key);
+            block_blob_client.CommitBlockList(block_ids);
+        }
+
+        void performMultipartUpload()
+        {
+            normal_part_size = 1024;
+
+            size_t position = offset;
+            size_t end_position = offset + total_size;
+
+            try
+            {
+                while (position < end_position)
+                {
+                    size_t next_position = std::min(position + normal_part_size, end_position);
+                    size_t part_size = next_position - position; /// `part_size` is either `normal_part_size` or smaller if it's the final part.
+
+                    uploadPart(position, part_size);
+
+                    position = next_position;
+                }
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+                waitForAllBackgroundTasks();
+                throw;
+            }
+
+            waitForAllBackgroundTasks();
+            completeMultipartUpload();
+        }
+
+
+        void uploadPart(size_t part_offset, size_t part_size)
+        {
+            LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Size: {}", dest_bucket, dest_key, part_size);
+
+            if (!part_size)
+            {
+                LOG_TRACE(log, "Skipping writing an empty part.");
+                return;
+            }
+
+            if (schedule)
+            {
+                UploadPartTask *  task = nullptr;
+
+                {
+                    std::lock_guard lock(bg_tasks_mutex);
+                    task = &bg_tasks.emplace_back();
+                    ++num_added_bg_tasks;
+                }
+
+                /// Notify waiting thread when task finished
+                auto task_finish_notify = [this, task]()
+                {
+                    std::lock_guard lock(bg_tasks_mutex);
+                    task->is_finished = true;
+                    ++num_finished_bg_tasks;
+
+                    /// Notification under mutex is important here.
+                    /// Otherwise, WriteBuffer could be destroyed in between
+                    /// Releasing lock and condvar notification.
+                    bg_tasks_condvar.notify_one();
+                };
+
+                try
+                {
+                    auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
+                    auto buffer = std::make_unique<StdStreamFromReadBuffer>(std::move(read_buffer), part_size);
+                    task->data = new char[part_size];
+                    task->size = part_size;
+                    buffer->read(task->data,part_size);
+                    task->block_id = getRandomASCIIString(64);
+
+                    schedule([this, task, task_finish_notify]()
+                    {
+                        try
+                        {
+                            processUploadTask(*task);
+                        }
+                        catch (...)
+                        {
+                            task->exception = std::current_exception();
+                        }
+                        task_finish_notify();
+                    }, Priority{});
+                }
+                catch (...)
+                {
+                    task_finish_notify();
+                    throw;
+                }
+            }
+            else
+            {
+                UploadPartTask task;
+                auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
+                auto buffer = std::make_unique<StdStreamFromReadBuffer>(std::move(read_buffer), part_size);
+                task.data = new char[part_size];
+                buffer->read(task.data,part_size);
+                task.size = part_size;
+                processUploadTask(task);
+                block_ids.emplace_back(task.block_id);
+            }
+        }
+
+        void processUploadTask(UploadPartTask & task)
+        {
+            auto block_id = processUploadPartRequest(task);
+
+            std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
+            task.block_id = block_id;
+            LOG_TRACE(log, "Writing part finished. Bucket: {}, Key: {}, block_id: {}, Parts: {}", dest_bucket, dest_key, block_id, bg_tasks.size());
+        }
+
+        String processUploadPartRequest(UploadPartTask & task)
+        {
+            ProfileEvents::increment(ProfileEvents::AzureUploadPart);
+            if (for_disk_azure_blob_storage)
+                ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
+
+            auto block_blob_client = client->GetBlockBlobClient(dest_key);
+            task.block_id = getRandomASCIIString(64);
+            Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.data), task.size);
+            block_blob_client.StageBlock(task.block_id, memory);
+
+            return task.block_id;
+        }
+
+
+        void waitForAllBackgroundTasks()
+        {
+            if (!schedule)
+                return;
+
+            std::unique_lock lock(bg_tasks_mutex);
+            /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock
+            bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); });
+
+            auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks);
+            for (auto & task : tasks)
+            {
+                if (task.exception)
+                    std::rethrow_exception(task.exception);
+                block_ids.emplace_back(task.block_id);
+            }
+        }
+    };
+}
+
+
+void copyDataToAzureBlobStorageFile(
+    const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
+    size_t offset,
+    size_t size,
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
+    const String & dest_bucket,
+    const String & dest_key,
+    std::shared_ptr<AzureObjectStorageSettings> settings,
+    const std::optional<std::map<String, String>> & object_metadata,
+    ThreadPoolCallbackRunner<void> schedule,
+    bool for_disk_azure_blob_storage)
+{
+    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage};
+    helper.performCopy();
+}
+
+
+void copyAzureBlobStorageFile(
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
+    const String & src_bucket,
+    const String & src_key,
+    size_t offset,
+    size_t size,
+    const String & dest_bucket,
+    const String & dest_key,
+    std::shared_ptr<AzureObjectStorageSettings> settings,
+    const ReadSettings & read_settings,
+    const std::optional<std::map<String, String>> & object_metadata,
+    ThreadPoolCallbackRunner<void> schedule,
+    bool for_disk_azure_blob_storage)
+{
+
+    if (size < max_single_operation_copy_size)
+    {
+        ProfileEvents::increment(ProfileEvents::AzureCopyObject);
+        if (for_disk_azure_blob_storage)
+            ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
+        auto block_blob_client_src = src_client->GetBlockBlobClient(src_key);
+        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_key);
+        auto uri = block_blob_client_src.GetUrl();
+        block_blob_client_dest.CopyFromUri(uri);
+    }
+    else
+    {
+        LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Bucket: {}, Key: {}", src_bucket, src_key);
+        auto create_read_buffer = [&]
+        {
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_key, read_settings, settings->max_single_read_retries,
+            settings->max_single_download_retries);
+        };
+
+        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage};
+        helper.performCopy();
+    }
+}
+
+}
+
+#endif
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
new file mode 100644
index 00000000000..31228fbcb23
--- /dev/null
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <Storages/StorageAzureBlobCluster.h>
+#include <Storages/StorageAzureBlob.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
+#include <base/types.h>
+#include <functional>
+#include <memory>
+
+
+namespace DB
+{
+class SeekableReadBuffer;
+
+using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
+
+/// Copies a file from AzureBlobStorage to AzureBlobStorage.
+/// The parameters `src_offset` and `src_size` specify a part in the source to copy.
+void copyAzureBlobStorageFile(
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
+    const String & src_bucket,
+    const String & src_key,
+    size_t src_offset,
+    size_t src_size,
+    const String & dest_bucket,
+    const String & dest_key,
+    std::shared_ptr<AzureObjectStorageSettings> settings,
+    const ReadSettings & read_settings,
+    const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
+    ThreadPoolCallbackRunner<void> schedule_ = {},
+    bool for_disk_azure_blob_storage = false);
+
+
+/// Copies data from any seekable source to AzureBlobStorage.
+/// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3
+/// however copyDataToS3File() is faster and spends less memory.
+/// The callback `create_read_buffer` can be called from multiple threads in parallel, so that should be thread-safe.
+/// The parameters `offset` and `size` specify a part in the source to copy.
+void copyDataToAzureBlobStorageFile(
+    const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
+    size_t offset,
+    size_t size,
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & client,
+    const String & dest_bucket,
+    const String & dest_key,
+    std::shared_ptr<AzureObjectStorageSettings> settings,
+    const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
+    ThreadPoolCallbackRunner<void> schedule_ = {},
+    bool for_disk_azure_blob_storage = false);
+
+}
+
+#endif
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 2e0703a8df3..e36604cfb1a 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -258,6 +258,17 @@ AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr loca
     return settings_ptr;
 }
 
+std::shared_ptr<AzureObjectStorageSettings> StorageAzureBlob::createSettingsAsSharedPtr(ContextPtr local_context)
+{
+    const auto & context_settings = local_context->getSettingsRef();
+    auto settings_ptr = std::make_shared<AzureObjectStorageSettings>();
+    settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size;
+    settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries;
+    settings_ptr->list_object_keys_size = static_cast<int32_t>(context_settings.azure_list_object_keys_size);
+
+    return settings_ptr;
+}
+
 void registerStorageAzureBlob(StorageFactory & factory)
 {
     factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args)
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index b97dee0caed..570e4124d73 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -80,6 +80,7 @@ public:
     static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
 
     static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
+    static std::shared_ptr<AzureObjectStorageSettings> createSettingsAsSharedPtr(ContextPtr local_context);
 
     static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection);
 
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/__init__.py b/tests/integration/test_backup_restore_azure_blob_storage/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_backup_restore_azure_blob_storage/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
new file mode 100644
index 00000000000..5725dce40cd
--- /dev/null
+++ b/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
@@ -0,0 +1,11 @@
+<clickhouse>
+    <background_processing_pool_thread_sleep_seconds>1</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_thread_sleep_seconds_random_part>0</background_processing_pool_thread_sleep_seconds_random_part>
+    <background_processing_pool_thread_sleep_seconds_if_nothing_to_do>0.0</background_processing_pool_thread_sleep_seconds_if_nothing_to_do>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>1</background_processing_pool_task_sleep_seconds_when_no_work_max>
+    <background_processing_pool_task_sleep_seconds_when_no_work_multiplier>1</background_processing_pool_task_sleep_seconds_when_no_work_multiplier>
+    <background_processing_pool_task_sleep_seconds_when_no_work_random_part>0</background_processing_pool_task_sleep_seconds_when_no_work_random_part>
+    <backup_threads>16</backup_threads>
+    <restore_threads>16</restore_threads>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
new file mode 100644
index 00000000000..b74bb1502ce
--- /dev/null
+++ b/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
@@ -0,0 +1,13 @@
+<!-- Sometime azurite is super slow, profiler make it even worse -->
+<clickhouse>
+    <profiles>
+        <default>
+            <query_profiler_real_time_period_ns>0</query_profiler_real_time_period_ns>
+            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
+            <load_marks_asynchronously>0</load_marks_asynchronously>
+            <backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
+            <backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
+            <backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml
new file mode 100644
index 00000000000..c12eb2f79f4
--- /dev/null
+++ b/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
new file mode 100644
index 00000000000..2ecf08a4f40
--- /dev/null
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+
+import gzip
+import json
+import logging
+import os
+import io
+import random
+import threading
+import time
+
+from azure.storage.blob import BlobServiceClient
+import helpers.client
+import pytest
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
+from helpers.network import PartitionManager
+from helpers.mock_servers import start_mock_servers
+from helpers.test_tools import exec_query_with_retry
+
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node",
+            main_configs=["configs/config.xml"],
+            user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
+            with_azurite=True,
+        )
+        cluster.start()
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def azure_query(
+    node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None
+):
+    for i in range(try_num):
+        try:
+            if expect_error == "true":
+                return node.query_and_get_error(query, settings=settings)
+            else:
+                return node.query(query, settings=settings)
+        except Exception as ex:
+            retriable_errors = [
+                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
+                "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
+                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
+                "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read",
+                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
+                "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected",
+                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
+                "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read",
+            ]
+            retry = False
+            for error in retriable_errors:
+                if error in str(ex):
+                    retry = True
+                    print(f"Try num: {i}. Having retriable error: {ex}")
+                    time.sleep(i)
+                    break
+            if not retry or i == try_num - 1:
+                raise Exception(ex)
+            if query_on_retry is not None:
+                node.query(query_on_retry)
+            continue
+
+
+def get_azure_file_content(filename, port):
+    container_name = "cont"
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(
+        str(connection_string)
+    )
+    container_client = blob_service_client.get_container_client(container_name)
+    blob_client = container_client.get_blob_client(filename)
+    download_stream = blob_client.download_blob()
+    return download_stream.readall().decode("utf-8")
+
+
+def put_azure_file_content(filename, port, data):
+    container_name = "cont"
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+    try:
+        container_client = blob_service_client.create_container(container_name)
+    except:
+        container_client = blob_service_client.get_container_client(container_name)
+
+    blob_client = container_client.get_blob_client(filename)
+    buf = io.BytesIO(data)
+    blob_client.upload_blob(buf)
+
+@pytest.fixture(autouse=True, scope="function")
+def delete_all_files(cluster):
+    port = cluster.env_variables["AZURITE_PORT"]
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+    containers = blob_service_client.list_containers()
+    for container in containers:
+        container_client = blob_service_client.get_container_client(container)
+        blob_list = container_client.list_blobs()
+        for blob in blob_list:
+            print(blob)
+            blob_client = container_client.get_blob_client(blob)
+            blob_client.delete_blob()
+
+        assert len(list(container_client.list_blobs())) == 0
+
+    yield
+
+
+def test_create_table_connection_string(cluster):
+    node = cluster.instances["node"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV')",
+    )
+
+def test_backup_restore(cluster):
+    node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_simple_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c.csv', 'CSV')",
+    )
+    azure_query(node, f"INSERT INTO test_simple_write_connection_string VALUES (1, 'a')")
+    print(get_azure_file_content("test_simple_write_c.csv", port))
+    assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
+
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv', 'CSV')"
+    azure_query(node,f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}")
+    print (get_azure_file_content("test_simple_write_c_backup.csv.backup", port))
+    azure_query(node, f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};")
+    assert(azure_query(node,f"SELECT * from test_simple_write_connection_string_restored") == "1\ta\n")
\ No newline at end of file

From 05b608cd76da8995086887f812e1ab3fceb99551 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 20 Nov 2023 10:12:45 +0000
Subject: [PATCH 016/884] Automatic style fix

---
 .../test.py                                   | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index 2ecf08a4f40..cda3cab07e4 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -18,7 +18,6 @@ from helpers.mock_servers import start_mock_servers
 from helpers.test_tools import exec_query_with_retry
 
 
-
 @pytest.fixture(scope="module")
 def cluster():
     try:
@@ -103,6 +102,7 @@ def put_azure_file_content(filename, port, data):
     buf = io.BytesIO(data)
     blob_client.upload_blob(buf)
 
+
 @pytest.fixture(autouse=True, scope="function")
 def delete_all_files(cluster):
     port = cluster.env_variables["AZURITE_PORT"]
@@ -133,6 +133,7 @@ def test_create_table_connection_string(cluster):
         f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV')",
     )
 
+
 def test_backup_restore(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -140,12 +141,23 @@ def test_backup_restore(cluster):
         node,
         f"CREATE TABLE test_simple_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c.csv', 'CSV')",
     )
-    azure_query(node, f"INSERT INTO test_simple_write_connection_string VALUES (1, 'a')")
+    azure_query(
+        node, f"INSERT INTO test_simple_write_connection_string VALUES (1, 'a')"
+    )
     print(get_azure_file_content("test_simple_write_c.csv", port))
     assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
 
     backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv', 'CSV')"
-    azure_query(node,f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}")
-    print (get_azure_file_content("test_simple_write_c_backup.csv.backup", port))
-    azure_query(node, f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};")
-    assert(azure_query(node,f"SELECT * from test_simple_write_connection_string_restored") == "1\ta\n")
\ No newline at end of file
+    azure_query(
+        node,
+        f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}",
+    )
+    print(get_azure_file_content("test_simple_write_c_backup.csv.backup", port))
+    azure_query(
+        node,
+        f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};",
+    )
+    assert (
+        azure_query(node, f"SELECT * from test_simple_write_connection_string_restored")
+        == "1\ta\n"
+    )

From 6dfb1c25ec6a4a61a4fe329191c10263eb19ad07 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 20 Nov 2023 11:37:06 +0100
Subject: [PATCH 017/884] Added docs

---
 docs/en/operations/backup.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index 6068b185ede..15d953249a0 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -451,3 +451,24 @@ To disallow concurrent backup/restore, you can use these settings respectively.
 
 The default value for both is true, so by default concurrent backup/restores are allowed.
 When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
+
+## Configuring BACKUP/RESTORE to use an AzureBlobStorage Endpoint
+
+To write backups to an AzureBlobStorage container you need the following pieces of information:
+- AzureBlobStorage endpoint connection string / url,
+- Container,
+- Path,
+- Account name (if url is specified)
+- Account Key (if url is specified)
+
+The destination for a backup will be specified like this:
+```
+AzureBlobStorage('<connection string>/<url>', '<Container>', '<Path>', '<Account Name>', '<Account Key>)
+```
+
+```sql
+BACKUP TABLE data TO AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
+    'test_container', 'data_backup');
+RESTORE TABLE data AS data_restored FROM AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
+    'test_container', 'data_backup');
+```

From 96c4b6bc35ee818afd2d2963dec7afdb5583969c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 20 Nov 2023 14:41:14 +0100
Subject: [PATCH 018/884] Updated to not analyze create parameterized view for
 analyzer & old analyzer

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 48 +++++--------------
 src/Storages/StorageView.cpp                  |  3 +-
 .../0_stateless/02428_parameterized_view.sh   |  2 +-
 3 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 747c0be009e..4ee666e2a9a 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -649,6 +649,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
     if (!attach && !is_restore_from_backup && context_->getSettingsRef().flatten_nested)
         res.flattenNested();
 
+
     if (res.getAllPhysical().empty())
         throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Cannot CREATE table without physical columns");
 
@@ -755,49 +756,22 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
     {
         Block as_select_sample;
 
-        if (getContext()->getSettingsRef().allow_experimental_analyzer)
+        if (!create.isParameterizedView())
         {
-            if (create.isParameterizedView())
-            {
-                auto select = create.select->clone();
-
-                ///Get all query parameters
-                const auto parameters = analyzeReceiveQueryParamsWithType(select);
-                NameToNameMap parameter_values;
-
-                for (const auto & parameter : parameters)
-                {
-                    const auto data_type = DataTypeFactory::instance().get(parameter.second);
-                    /// Todo improve getting default values & include more datatypes
-                    if (data_type->isValueRepresentedByNumber() || parameter.second == "String")
-                        parameter_values[parameter.first] = "1";
-                    else if (parameter.second.starts_with("Array") || parameter.second.starts_with("Map"))
-                        parameter_values[parameter.first] = "[]";
-                    else
-                        parameter_values[parameter.first] = " ";
-                }
-
-                /// Replace with default parameters
-                ReplaceQueryParameterVisitor visitor(parameter_values);
-                visitor.visit(select);
-
-                as_select_sample = InterpreterSelectQueryAnalyzer::getSampleBlock(select, getContext());
-            }
-            else
+            if (getContext()->getSettingsRef().allow_experimental_analyzer)
             {
                 as_select_sample = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext());
             }
+            else
+            {
+                as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(),
+                    getContext(),
+                    false /* is_subquery */,
+                    create.isParameterizedView());
+            }
 
+            properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
         }
-        else
-        {
-            as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(),
-                getContext(),
-                false /* is_subquery */,
-                create.isParameterizedView());
-        }
-
-        properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
     }
     else if (create.as_table_function)
     {
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index f0f9b9540de..2f7267e3701 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -112,7 +112,8 @@ StorageView::StorageView(
     : IStorage(table_id_)
 {
     StorageInMemoryMetadata storage_metadata;
-    storage_metadata.setColumns(columns_);
+    if (is_parameterized_view_ && !query.isParameterizedView())
+        storage_metadata.setColumns(columns_);
     storage_metadata.setComment(comment);
 
     if (!query.select)
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh
index ad9c672f4c5..499b8697ffc 100755
--- a/tests/queries/0_stateless/02428_parameterized_view.sh
+++ b/tests/queries/0_stateless/02428_parameterized_view.sh
@@ -37,7 +37,7 @@ $CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Ca
 $CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1(price=20)"
 $CLICKHOUSE_CLIENT -q "SELECT Price FROM \`test_02428_pv1\`(price=20)"
 
-$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 |  grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK'
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 |  grep -q "UNKNOWN_QUERY_PARAMETER\|UNKNOWN_IDENTIFIER" && echo 'ERROR' || echo 'OK'
 $CLICKHOUSE_CLIENT --param_p 10 -q "SELECT Price FROM test_02428_pv1(price={p:UInt64})"
 
 $CLICKHOUSE_CLIENT --param_l 1 -q "SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64})"

From d0827e3ea77ff432c4a6a66145827428bcd62b5e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 1 Dec 2023 17:45:23 +0000
Subject: [PATCH 019/884] Add a test.

---
 .../0_stateless/02932_set_ttl_where.reference |  0
 .../0_stateless/02932_set_ttl_where.sql       | 22 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/02932_set_ttl_where.reference
 create mode 100644 tests/queries/0_stateless/02932_set_ttl_where.sql

diff --git a/tests/queries/0_stateless/02932_set_ttl_where.reference b/tests/queries/0_stateless/02932_set_ttl_where.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02932_set_ttl_where.sql b/tests/queries/0_stateless/02932_set_ttl_where.sql
new file mode 100644
index 00000000000..85fddf613e8
--- /dev/null
+++ b/tests/queries/0_stateless/02932_set_ttl_where.sql
@@ -0,0 +1,22 @@
+create or replace table temp (
+    a UInt32
+)
+engine = MergeTree
+order by a;
+
+insert into temp select number from system.numbers limit 100_000;
+
+create or replace table t_temp (
+    a UInt32,
+    timestamp DateTime
+)
+engine = MergeTree
+order by a
+TTL timestamp + INTERVAL 2 SECOND WHERE a in (select a from temp);
+
+select sleep(1);
+insert into t_temp select rand(), now() from system.numbers limit 1_000_000;
+select sleep(1);
+insert into t_temp select rand(), now() from system.numbers limit 1_000_000;
+select sleep(1);
+optimize table t_temp final;

From 508046e6922c0cb163ce5611f1e6ef6a22f8b7f1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 1 Dec 2023 20:31:26 +0000
Subject: [PATCH 020/884] Attempt to support subqueries in TTL.

---
 src/Interpreters/PreparedSets.cpp             |  8 ++-
 src/Interpreters/PreparedSets.h               |  1 +
 src/Processors/TTL/ITTLAlgorithm.cpp          |  5 +-
 src/Processors/TTL/ITTLAlgorithm.h            |  9 ++-
 .../TTL/TTLAggregationAlgorithm.cpp           | 11 ++--
 src/Processors/TTL/TTLAggregationAlgorithm.h  |  1 +
 src/Processors/TTL/TTLColumnAlgorithm.cpp     |  5 +-
 src/Processors/TTL/TTLColumnAlgorithm.h       |  1 +
 src/Processors/TTL/TTLDeleteAlgorithm.cpp     | 10 +--
 src/Processors/TTL/TTLDeleteAlgorithm.h       |  2 +-
 src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp |  5 +-
 src/Processors/TTL/TTLUpdateInfoAlgorithm.h   |  1 +
 src/Processors/Transforms/TTLTransform.cpp    |  2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         | 19 +++---
 src/Storages/StorageInMemoryMetadata.cpp      | 21 +++----
 src/Storages/TTLDescription.cpp               | 62 ++++++++++++-------
 src/Storages/TTLDescription.h                 | 15 ++++-
 17 files changed, 116 insertions(+), 62 deletions(-)

diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index 955d8892284..ea8d9a62b8b 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -189,11 +189,17 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context)
         }
     }
 
+    set_and_key->set->fillSetElements();
+
+    return buildSetInplace(context);
+}
+
+SetPtr FutureSetFromSubquery::buildSetInplace(const ContextPtr & context)
+{
     auto plan = build(context);
     if (!plan)
         return nullptr;
 
-    set_and_key->set->fillSetElements();
     auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
     auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
     pipeline.complete(std::make_shared<EmptySink>(Block()));
diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h
index e237789c63c..3e751d309ba 100644
--- a/src/Interpreters/PreparedSets.h
+++ b/src/Interpreters/PreparedSets.h
@@ -107,6 +107,7 @@ public:
     SetPtr get() const override;
     DataTypes getTypes() const override;
     SetPtr buildOrderedSetInplace(const ContextPtr & context) override;
+    SetPtr buildSetInplace(const ContextPtr & context);
 
     std::unique_ptr<QueryPlan> build(const ContextPtr & context);
 
diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp
index 79140137df8..af6c4e4ac35 100644
--- a/src/Processors/TTL/ITTLAlgorithm.cpp
+++ b/src/Processors/TTL/ITTLAlgorithm.cpp
@@ -11,8 +11,9 @@ namespace ErrorCodes
 }
 
 ITTLAlgorithm::ITTLAlgorithm(
-    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
-    : description(description_)
+    const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ttl_expressions(ttl_expressions_)
+    , description(description_)
     , old_ttl_info(old_ttl_info_)
     , current_time(current_time_)
     , force(force_)
diff --git a/src/Processors/TTL/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h
index 49cd2c46d9d..6e73286b564 100644
--- a/src/Processors/TTL/ITTLAlgorithm.h
+++ b/src/Processors/TTL/ITTLAlgorithm.h
@@ -8,6 +8,12 @@
 namespace DB
 {
 
+struct TTlExpressions
+{
+    ExpressionActionsPtr expression;
+    ExpressionActionsPtr where_expression;
+};
+
 /**
  * Represents the actions, which are required to do
  * with data, when TTL is expired: delete, aggregate, etc.
@@ -18,7 +24,7 @@ public:
     using TTLInfo = IMergeTreeDataPart::TTLInfo;
     using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
 
-    ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    ITTLAlgorithm(const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
     virtual ~ITTLAlgorithm() = default;
 
     virtual void execute(Block & block) = 0;
@@ -39,6 +45,7 @@ protected:
     bool isTTLExpired(time_t ttl) const;
     UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
 
+    const TTlExpressions ttl_expressions;
     const TTLDescription description;
     const TTLInfo old_ttl_info;
     const time_t current_time;
diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
index fa3436ec55d..ab2ba5f58fc 100644
--- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
@@ -5,13 +5,14 @@ namespace DB
 {
 
 TTLAggregationAlgorithm::TTLAggregationAlgorithm(
+    const TTlExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLInfo & old_ttl_info_,
     time_t current_time_,
     bool force_,
     const Block & header_,
     const MergeTreeData & storage_)
-    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    : ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
     , header(header_)
 {
     current_key_value.resize(description.group_by_keys.size());
@@ -73,8 +74,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
         const auto & column_names = header.getNames();
         MutableColumns aggregate_columns = header.cloneEmptyColumns();
 
-        auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
-        auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
+        auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
+        auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
 
         size_t rows_aggregated = 0;
         size_t current_key_start = 0;
@@ -145,8 +146,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
     /// If some rows were aggregated we have to recalculate ttl info's
     if (some_rows_were_aggregated)
     {
-        auto ttl_column_after_aggregation = executeExpressionAndGetColumn(description.expression, block, description.result_column);
-        auto where_column_after_aggregation = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
+        auto ttl_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
+        auto where_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
         for (size_t i = 0; i < block.rows(); ++i)
         {
             bool where_filter_passed = !where_column_after_aggregation || where_column_after_aggregation->getBool(i);
diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.h b/src/Processors/TTL/TTLAggregationAlgorithm.h
index 0e4bf092ed6..9fd074efba8 100644
--- a/src/Processors/TTL/TTLAggregationAlgorithm.h
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.h
@@ -13,6 +13,7 @@ class TTLAggregationAlgorithm final : public ITTLAlgorithm
 {
 public:
     TTLAggregationAlgorithm(
+        const TTlExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLInfo & old_ttl_info_,
         time_t current_time_,
diff --git a/src/Processors/TTL/TTLColumnAlgorithm.cpp b/src/Processors/TTL/TTLColumnAlgorithm.cpp
index 04c4d7b9348..cb99dcf99b1 100644
--- a/src/Processors/TTL/TTLColumnAlgorithm.cpp
+++ b/src/Processors/TTL/TTLColumnAlgorithm.cpp
@@ -4,6 +4,7 @@ namespace DB
 {
 
 TTLColumnAlgorithm::TTLColumnAlgorithm(
+    const TTlExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLInfo & old_ttl_info_,
     time_t current_time_,
@@ -12,7 +13,7 @@ TTLColumnAlgorithm::TTLColumnAlgorithm(
     const ExpressionActionsPtr & default_expression_,
     const String & default_column_name_,
     bool is_compact_part_)
-    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    : ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
     , column_name(column_name_)
     , default_expression(default_expression_)
     , default_column_name(default_column_name_)
@@ -49,7 +50,7 @@ void TTLColumnAlgorithm::execute(Block & block)
     if (default_column)
         default_column = default_column->convertToFullColumnIfConst();
 
-    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
 
     auto & column_with_type = block.getByName(column_name);
     const IColumn * values_column = column_with_type.column.get();
diff --git a/src/Processors/TTL/TTLColumnAlgorithm.h b/src/Processors/TTL/TTLColumnAlgorithm.h
index 30de77dcc2a..efcd7c74454 100644
--- a/src/Processors/TTL/TTLColumnAlgorithm.h
+++ b/src/Processors/TTL/TTLColumnAlgorithm.h
@@ -11,6 +11,7 @@ class TTLColumnAlgorithm final : public ITTLAlgorithm
 {
 public:
     TTLColumnAlgorithm(
+        const TTlExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLInfo & old_ttl_info_,
         time_t current_time_,
diff --git a/src/Processors/TTL/TTLDeleteAlgorithm.cpp b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
index f176df2d003..6a172e9c3c3 100644
--- a/src/Processors/TTL/TTLDeleteAlgorithm.cpp
+++ b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
@@ -4,8 +4,8 @@ namespace DB
 {
 
 TTLDeleteAlgorithm::TTLDeleteAlgorithm(
-    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
-    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
 {
     if (!isMinTTLExpired())
         new_ttl_info = old_ttl_info;
@@ -19,8 +19,8 @@ void TTLDeleteAlgorithm::execute(Block & block)
     if (!block || !isMinTTLExpired())
         return;
 
-    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
-    auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
+    auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
+    auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
 
     MutableColumns result_columns;
     const auto & column_names = block.getNames();
@@ -54,7 +54,7 @@ void TTLDeleteAlgorithm::execute(Block & block)
 
 void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
 {
-    if (description.where_expression)
+    if (ttl_expressions.where_expression)
         data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
     else
         data_part->ttl_infos.table_ttl = new_ttl_info;
diff --git a/src/Processors/TTL/TTLDeleteAlgorithm.h b/src/Processors/TTL/TTLDeleteAlgorithm.h
index 292a29bfa27..23389070774 100644
--- a/src/Processors/TTL/TTLDeleteAlgorithm.h
+++ b/src/Processors/TTL/TTLDeleteAlgorithm.h
@@ -10,7 +10,7 @@ namespace DB
 class TTLDeleteAlgorithm final : public ITTLAlgorithm
 {
 public:
-    TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    TTLDeleteAlgorithm(const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
 
     void execute(Block & block) override;
     void finalize(const MutableDataPartPtr & data_part) const override;
diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
index eba364aa2b8..34c0cad70ea 100644
--- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
+++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
@@ -4,13 +4,14 @@ namespace DB
 {
 
 TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
+    const TTlExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLUpdateField ttl_update_field_,
     const String ttl_update_key_,
     const TTLInfo & old_ttl_info_,
     time_t current_time_,
     bool force_)
-    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    : ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
     , ttl_update_field(ttl_update_field_)
     , ttl_update_key(ttl_update_key_)
 {
@@ -21,7 +22,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
     if (!block)
         return;
 
-    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
     for (size_t i = 0; i < block.rows(); ++i)
     {
         UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
index 45eecbde3d0..e9bcfcdec88 100644
--- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
+++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
@@ -20,6 +20,7 @@ class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
 {
 public:
     TTLUpdateInfoAlgorithm(
+        const TTlExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLUpdateField ttl_update_field_,
         const String ttl_update_key_,
diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp
index 7cde86098c7..d3d45f68d46 100644
--- a/src/Processors/Transforms/TTLTransform.cpp
+++ b/src/Processors/Transforms/TTLTransform.cpp
@@ -36,7 +36,7 @@ TTLTransform::TTLTransform(
             rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
 
         /// Skip all data if table ttl is expired for part
-        if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
+        if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression_ast)
             all_data_dropped = true;
 
         delete_algorithm = algorithm.get();
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 2a381afa805..d080240b066 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -125,13 +125,18 @@ void buildScatterSelector(
 
 /// Computes ttls and updates ttl infos
 void updateTTL(
+    const ContextPtr context,
     const TTLDescription & ttl_entry,
     IMergeTreeDataPart::TTLInfos & ttl_infos,
     DB::MergeTreeDataPartTTLInfo & ttl_info,
     const Block & block,
     bool update_part_min_max_ttls)
 {
-    auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
+    auto expr_and_set = ttl_entry.buildExpression();
+    for (auto & subquery : expr_and_set.sets->getSubqueries())
+        subquery->buildSetInplace(context);
+
+    auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(expr_and_set.expression, block, ttl_entry.result_column);
 
     if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
     {
@@ -488,7 +493,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
     DB::IMergeTreeDataPart::TTLInfos move_ttl_infos;
     const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs();
     for (const auto & ttl_entry : move_ttl_entries)
-        updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
+        updateTTL(context, ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
 
     ReservationPtr reservation = data.reserveSpacePreferringTTLRules(metadata_snapshot, expected_size, move_ttl_infos, time(nullptr), 0, true);
     VolumePtr volume = data.getStoragePolicy()->getVolume(0);
@@ -543,20 +548,20 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
     }
 
     if (metadata_snapshot->hasRowsTTL())
-        updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
+        updateTTL(context, metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
 
     for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
+        updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
 
     for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
+        updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
 
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
+        updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
     for (const auto & ttl_entry : recompression_ttl_entries)
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
+        updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
 
     new_data_part->ttl_infos.update(move_ttl_infos);
 
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index af285a953dc..7db5af82e0b 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -193,7 +193,7 @@ TTLDescription StorageInMemoryMetadata::getRowsTTL() const
 
 bool StorageInMemoryMetadata::hasRowsTTL() const
 {
-    return table_ttl.rows_ttl.expression != nullptr;
+    return table_ttl.rows_ttl.expression_ast != nullptr;
 }
 
 TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
@@ -251,9 +251,8 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
     NameSet required_ttl_columns;
     NameSet updated_ttl_columns;
 
-    auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set)
+    auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set)
     {
-        auto required_columns = expression->getRequiredColumns();
         for (const auto & dependency : required_columns)
         {
             if (updated_columns.contains(dependency))
@@ -269,13 +268,13 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
     for (const auto & index : getSecondaryIndices())
     {
         if (has_dependency(index.name, ColumnDependency::SKIP_INDEX))
-            add_dependent_columns(index.expression, indices_columns);
+            add_dependent_columns(index.expression->getRequiredColumns(), indices_columns);
     }
 
     for (const auto & projection : getProjections())
     {
         if (has_dependency(projection.name, ColumnDependency::PROJECTION))
-            add_dependent_columns(&projection, projections_columns);
+            add_dependent_columns(projection.getRequiredColumns(), projections_columns);
     }
 
     auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
@@ -289,25 +288,25 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
     };
 
     if (hasRowsTTL())
-        add_for_rows_ttl(getRowsTTL().expression, required_ttl_columns);
+        add_for_rows_ttl(getRowsTTL().expression_columns, required_ttl_columns);
 
     for (const auto & entry : getRowsWhereTTLs())
-        add_for_rows_ttl(entry.expression, required_ttl_columns);
+        add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
 
     for (const auto & entry : getGroupByTTLs())
-        add_for_rows_ttl(entry.expression, required_ttl_columns);
+        add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
 
     for (const auto & entry : getRecompressionTTLs())
-        add_dependent_columns(entry.expression, required_ttl_columns);
+        add_dependent_columns(entry.expression_columns, required_ttl_columns);
 
     for (const auto & [name, entry] : getColumnTTLs())
     {
-        if (add_dependent_columns(entry.expression, required_ttl_columns) && include_ttl_target)
+        if (add_dependent_columns(entry.expression_columns, required_ttl_columns) && include_ttl_target)
             updated_ttl_columns.insert(name);
     }
 
     for (const auto & entry : getMoveTTLs())
-        add_dependent_columns(entry.expression, required_ttl_columns);
+        add_dependent_columns(entry.expression_columns, required_ttl_columns);
 
     //TODO what about rows_where_ttl and group_by_ttl ??
 
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index f601fed06ac..47138f30e4f 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -113,11 +113,11 @@ TTLDescription::TTLDescription(const TTLDescription & other)
     , if_exists(other.if_exists)
     , recompression_codec(other.recompression_codec)
 {
-    if (other.expression)
-        expression = other.expression->clone();
+    // if (other.expression)
+    //     expression = other.expression->clone();
 
-    if (other.where_expression)
-        where_expression = other.where_expression->clone();
+    // if (other.where_expression)
+    //     where_expression = other.where_expression->clone();
 }
 
 TTLDescription & TTLDescription::operator=(const TTLDescription & other)
@@ -131,16 +131,16 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     else
         expression_ast.reset();
 
-    if (other.expression)
-        expression = other.expression->clone();
-    else
-        expression.reset();
+    // if (other.expression)
+    //     expression = other.expression->clone();
+    // else
+    //     expression.reset();
 
     result_column = other.result_column;
-    if (other.where_expression)
-        where_expression = other.where_expression->clone();
-    else
-        where_expression.reset();
+    // if (other.where_expression)
+    //     where_expression = other.where_expression->clone();
+    // else
+    //     where_expression.reset();
 
     where_result_column = other.where_result_column;
     group_by_keys = other.group_by_keys;
@@ -158,6 +158,17 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     return * this;
 }
 
+static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndTypesList & columns, const ContextPtr & context)
+{
+    ExpressionAndSets result;
+    auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, columns);
+    ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context);
+    result.expression = analyzer.getActions(false);
+    result.sets = analyzer.getPreparedSets();
+
+    return result;
+}
+
 TTLDescription TTLDescription::getTTLFromAST(
     const ASTPtr & definition_ast,
     const ColumnsDescription & columns,
@@ -174,10 +185,15 @@ TTLDescription TTLDescription::getTTLFromAST(
         result.expression_ast = definition_ast->clone();
 
     auto ttl_ast = result.expression_ast->clone();
-    auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
-    result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
+    auto expression = buildExpressionAndSets(ttl_ast, columns.getAllPhysical(), context).expression;
+    result.expression_columns = expression->getRequiredColumns();
+
+    // auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
+    // result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
     result.result_column = ttl_ast->getColumnName();
 
+    ExpressionActionsPtr where_expression;
+
     if (ttl_element == nullptr) /// columns TTL
     {
         result.destination_type = DataDestinationType::DELETE;
@@ -194,8 +210,10 @@ TTLDescription TTLDescription::getTTLFromAST(
         {
             if (ASTPtr where_expr_ast = ttl_element->where())
             {
-                auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
-                result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
+                result.where_expression_ast = where_expr_ast->clone();
+                where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
+                // auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
+                // result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
                 result.where_result_column = where_expr_ast->getColumnName();
             }
         }
@@ -221,17 +239,17 @@ TTLDescription TTLDescription::getTTLFromAST(
             for (const auto & ast : ttl_element->group_by_assignments)
             {
                 const auto assignment = ast->as<const ASTAssignment &>();
-                auto expression = assignment.expression();
+                auto ass_expression = assignment.expression();
 
                 FindAggregateFunctionVisitor::Data data{false};
-                FindAggregateFunctionVisitor(data).visit(expression);
+                FindAggregateFunctionVisitor(data).visit(ass_expression);
 
                 if (!data.has_aggregate_function)
                     throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
                     "Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
 
-                expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
-                aggregations.emplace_back(assignment.column_name, std::move(expression));
+                ass_expression = addTypeConversionToAST(std::move(ass_expression), columns.getPhysical(assignment.column_name).type->getName());
+                aggregations.emplace_back(assignment.column_name, std::move(ass_expression));
                 aggregation_columns_set.insert(assignment.column_name);
             }
 
@@ -289,7 +307,7 @@ TTLDescription TTLDescription::getTTLFromAST(
         }
     }
 
-    checkTTLExpression(result.expression, result.result_column);
+    checkTTLExpression(expression, result.result_column);
     return result;
 }
 
@@ -341,7 +359,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
         if (ttl.mode == TTLMode::DELETE)
         {
-            if (!ttl.where_expression)
+            if (!ttl.where_expression_ast)
             {
                 if (have_unconditional_delete_ttl)
                     throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed");
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index 8f60eb604b5..5ea243424cb 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -33,6 +33,15 @@ struct TTLAggregateDescription
 
 using TTLAggregateDescriptions = std::vector<TTLAggregateDescription>;
 
+class PreparedSets;
+using PreparedSetsPtr = std::shared_ptr<PreparedSets>;
+
+struct ExpressionAndSets
+{
+    ExpressionActionsPtr expression;
+    PreparedSetsPtr sets;
+};
+
 /// Common struct for TTL record in storage
 struct TTLDescription
 {
@@ -42,9 +51,10 @@ struct TTLDescription
     /// TTL d + INTERVAL 1 DAY
     ///    ^~~~~~~~~~~~~~~~~~~^
     ASTPtr expression_ast;
+    Names expression_columns;
 
     /// Expression actions evaluated from AST
-    ExpressionActionsPtr expression;
+    ExpressionAndSets buildExpression() const;
 
     /// Result column of this TTL expression
     String result_column;
@@ -52,7 +62,8 @@ struct TTLDescription
     /// WHERE part in TTL expression
     /// TTL ... WHERE x % 10 == 0 and y > 5
     ///              ^~~~~~~~~~~~~~~~~~~~~~^
-    ExpressionActionsPtr where_expression;
+    ASTPtr where_expression_ast;
+    ExpressionAndSets buildWhereExpression() const;
 
     /// Name of result column from WHERE expression
     String where_result_column;

From 7ab4af06df0d78e6728e3cc5c727e5c9e4cc33ef Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 4 Dec 2023 18:04:42 +0000
Subject: [PATCH 021/884] Attempt to support subqueries in TTL. (2)

---
 src/Processors/QueryPlan/CreatingSetsStep.cpp | 29 +++++++++++
 src/Processors/QueryPlan/CreatingSetsStep.h   |  2 +
 src/Processors/TTL/ITTLAlgorithm.cpp          |  2 +-
 src/Processors/TTL/ITTLAlgorithm.h            |  6 +--
 .../TTL/TTLAggregationAlgorithm.cpp           |  2 +-
 src/Processors/TTL/TTLAggregationAlgorithm.h  |  2 +-
 src/Processors/TTL/TTLColumnAlgorithm.cpp     |  2 +-
 src/Processors/TTL/TTLColumnAlgorithm.h       |  2 +-
 src/Processors/TTL/TTLDeleteAlgorithm.cpp     |  2 +-
 src/Processors/TTL/TTLDeleteAlgorithm.h       |  2 +-
 src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp |  2 +-
 src/Processors/TTL/TTLUpdateInfoAlgorithm.h   |  2 +-
 .../Transforms/TTLCalcTransform.cpp           | 33 ++++++++++---
 src/Processors/Transforms/TTLCalcTransform.h  |  4 ++
 src/Processors/Transforms/TTLTransform.cpp    | 33 ++++++++++---
 src/Processors/Transforms/TTLTransform.h      |  5 ++
 src/Storages/MergeTree/MergeTask.cpp          | 36 +++++++++-----
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         | 49 ++++++++++++++-----
 src/Storages/StorageInMemoryMetadata.cpp      |  8 +--
 src/Storages/TTLDescription.cpp               | 21 +++++++-
 src/Storages/TTLDescription.h                 |  7 +--
 22 files changed, 197 insertions(+), 56 deletions(-)

diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 3e4dfb0c7d1..11415e8d815 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -157,6 +157,35 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
     query_plan.unitePlans(std::move(creating_sets), std::move(plans));
 }
 
+QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context)
+{
+    DataStreams input_streams;
+    input_streams.emplace_back(DataStream{pipeline->getHeader()});
+
+    QueryPipelineBuilders pipelines;
+    pipelines.reserve(1 + subqueries.size());
+    pipelines.push_back(std::move(pipeline));
+
+    auto plan_settings = QueryPlanOptimizationSettings::fromContext(context);
+    auto pipeline_settings = BuildQueryPipelineSettings::fromContext(context);
+
+    for (auto & future_set : subqueries)
+    {
+        if (future_set->get())
+            continue;
+
+        auto plan = future_set->build(context);
+        if (!plan)
+            continue;
+
+        input_streams.emplace_back(plan->getCurrentDataStream());
+        pipelines.emplace_back(plan->buildQueryPipeline(plan_settings, pipeline_settings));
+    }
+
+    CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
+    return std::move(pipelines.front());
+}
+
 std::vector<std::unique_ptr<QueryPlan>> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step)
 {
     std::vector<std::unique_ptr<QueryPlan>> plans;
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h
index a90b70a2fa4..292ec19914c 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.h
+++ b/src/Processors/QueryPlan/CreatingSetsStep.h
@@ -72,4 +72,6 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
 
 void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context);
 
+QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context);
+
 }
diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp
index af6c4e4ac35..761f43e2422 100644
--- a/src/Processors/TTL/ITTLAlgorithm.cpp
+++ b/src/Processors/TTL/ITTLAlgorithm.cpp
@@ -11,7 +11,7 @@ namespace ErrorCodes
 }
 
 ITTLAlgorithm::ITTLAlgorithm(
-    const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
     : ttl_expressions(ttl_expressions_)
     , description(description_)
     , old_ttl_info(old_ttl_info_)
diff --git a/src/Processors/TTL/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h
index 6e73286b564..d79aa8a8dfc 100644
--- a/src/Processors/TTL/ITTLAlgorithm.h
+++ b/src/Processors/TTL/ITTLAlgorithm.h
@@ -8,7 +8,7 @@
 namespace DB
 {
 
-struct TTlExpressions
+struct TTLExpressions
 {
     ExpressionActionsPtr expression;
     ExpressionActionsPtr where_expression;
@@ -24,7 +24,7 @@ public:
     using TTLInfo = IMergeTreeDataPart::TTLInfo;
     using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
 
-    ITTLAlgorithm(const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    ITTLAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
     virtual ~ITTLAlgorithm() = default;
 
     virtual void execute(Block & block) = 0;
@@ -45,7 +45,7 @@ protected:
     bool isTTLExpired(time_t ttl) const;
     UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
 
-    const TTlExpressions ttl_expressions;
+    const TTLExpressions ttl_expressions;
     const TTLDescription description;
     const TTLInfo old_ttl_info;
     const time_t current_time;
diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
index ab2ba5f58fc..0c6184a56e5 100644
--- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
@@ -5,7 +5,7 @@ namespace DB
 {
 
 TTLAggregationAlgorithm::TTLAggregationAlgorithm(
-    const TTlExpressions & ttl_expressions_,
+    const TTLExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLInfo & old_ttl_info_,
     time_t current_time_,
diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.h b/src/Processors/TTL/TTLAggregationAlgorithm.h
index 9fd074efba8..f7bf19a202b 100644
--- a/src/Processors/TTL/TTLAggregationAlgorithm.h
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.h
@@ -13,7 +13,7 @@ class TTLAggregationAlgorithm final : public ITTLAlgorithm
 {
 public:
     TTLAggregationAlgorithm(
-        const TTlExpressions & ttl_expressions_,
+        const TTLExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLInfo & old_ttl_info_,
         time_t current_time_,
diff --git a/src/Processors/TTL/TTLColumnAlgorithm.cpp b/src/Processors/TTL/TTLColumnAlgorithm.cpp
index cb99dcf99b1..e27050564ce 100644
--- a/src/Processors/TTL/TTLColumnAlgorithm.cpp
+++ b/src/Processors/TTL/TTLColumnAlgorithm.cpp
@@ -4,7 +4,7 @@ namespace DB
 {
 
 TTLColumnAlgorithm::TTLColumnAlgorithm(
-    const TTlExpressions & ttl_expressions_,
+    const TTLExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLInfo & old_ttl_info_,
     time_t current_time_,
diff --git a/src/Processors/TTL/TTLColumnAlgorithm.h b/src/Processors/TTL/TTLColumnAlgorithm.h
index efcd7c74454..f34dae952d1 100644
--- a/src/Processors/TTL/TTLColumnAlgorithm.h
+++ b/src/Processors/TTL/TTLColumnAlgorithm.h
@@ -11,7 +11,7 @@ class TTLColumnAlgorithm final : public ITTLAlgorithm
 {
 public:
     TTLColumnAlgorithm(
-        const TTlExpressions & ttl_expressions_,
+        const TTLExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLInfo & old_ttl_info_,
         time_t current_time_,
diff --git a/src/Processors/TTL/TTLDeleteAlgorithm.cpp b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
index 6a172e9c3c3..6f9bc315276 100644
--- a/src/Processors/TTL/TTLDeleteAlgorithm.cpp
+++ b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
@@ -4,7 +4,7 @@ namespace DB
 {
 
 TTLDeleteAlgorithm::TTLDeleteAlgorithm(
-    const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
     : ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
 {
     if (!isMinTTLExpired())
diff --git a/src/Processors/TTL/TTLDeleteAlgorithm.h b/src/Processors/TTL/TTLDeleteAlgorithm.h
index 23389070774..622e45acecb 100644
--- a/src/Processors/TTL/TTLDeleteAlgorithm.h
+++ b/src/Processors/TTL/TTLDeleteAlgorithm.h
@@ -10,7 +10,7 @@ namespace DB
 class TTLDeleteAlgorithm final : public ITTLAlgorithm
 {
 public:
-    TTLDeleteAlgorithm(const TTlExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    TTLDeleteAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
 
     void execute(Block & block) override;
     void finalize(const MutableDataPartPtr & data_part) const override;
diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
index 34c0cad70ea..b7cddf3c165 100644
--- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
+++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
@@ -4,7 +4,7 @@ namespace DB
 {
 
 TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
-    const TTlExpressions & ttl_expressions_,
+    const TTLExpressions & ttl_expressions_,
     const TTLDescription & description_,
     const TTLUpdateField ttl_update_field_,
     const String ttl_update_key_,
diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
index e9bcfcdec88..0cf31765aef 100644
--- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
+++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h
@@ -20,7 +20,7 @@ class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
 {
 public:
     TTLUpdateInfoAlgorithm(
-        const TTlExpressions & ttl_expressions_,
+        const TTLExpressions & ttl_expressions_,
         const TTLDescription & description_,
         const TTLUpdateField ttl_update_field_,
         const String ttl_update_key_,
diff --git a/src/Processors/Transforms/TTLCalcTransform.cpp b/src/Processors/Transforms/TTLCalcTransform.cpp
index 31fb61239ef..204dfe21733 100644
--- a/src/Processors/Transforms/TTLCalcTransform.cpp
+++ b/src/Processors/Transforms/TTLCalcTransform.cpp
@@ -4,7 +4,22 @@
 namespace DB
 {
 
+static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
+{
+    auto expr = ttl_descr.buildExpression(context);
+    auto where_expr = ttl_descr.buildWhereExpression(context);
+
+    auto expr_queries = expr.sets->getSubqueries();
+    auto where_expr_queries = expr.sets->getSubqueries();
+
+    subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
+    subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+
+    return {expr.expression, where_expr.expression};
+}
+
 TTLCalcTransform::TTLCalcTransform(
+    const ContextPtr & context,
     const Block & header_,
     const MergeTreeData & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -21,33 +36,39 @@ TTLCalcTransform::TTLCalcTransform(
     {
         const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            rows_ttl, TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
+            getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
+            TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
     }
 
     for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            where_ttl, TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
+            getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
+            TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
 
     for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            group_by_ttl, TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
+            getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
+            TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
 
     if (metadata_snapshot_->hasAnyColumnTTL())
     {
         for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
         {
             algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-                description, TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
+                getExpressions(description, subqueries_for_sets, context), description,
+                TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
         }
     }
 
     for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
+            getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
+            TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
 
     for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
+            getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
+            TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
 }
 
 void TTLCalcTransform::consume(Chunk chunk)
diff --git a/src/Processors/Transforms/TTLCalcTransform.h b/src/Processors/Transforms/TTLCalcTransform.h
index 495879400dc..960438f5f2b 100644
--- a/src/Processors/Transforms/TTLCalcTransform.h
+++ b/src/Processors/Transforms/TTLCalcTransform.h
@@ -15,6 +15,7 @@ class TTLCalcTransform : public IAccumulatingTransform
 {
 public:
     TTLCalcTransform(
+        const ContextPtr & context,
         const Block & header_,
         const MergeTreeData & storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -23,6 +24,8 @@ public:
         bool force_
     );
 
+    PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
+
     String getName() const override { return "TTL_CALC"; }
     Status prepare() override;
 
@@ -35,6 +38,7 @@ protected:
 
 private:
     std::vector<TTLAlgorithmPtr> algorithms;
+    PreparedSets::Subqueries subqueries_for_sets;
 
     /// ttl_infos and empty_columns are updating while reading
     const MergeTreeData::MutableDataPartPtr & data_part;
diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp
index d3d45f68d46..69e2e6e5fc0 100644
--- a/src/Processors/Transforms/TTLTransform.cpp
+++ b/src/Processors/Transforms/TTLTransform.cpp
@@ -16,7 +16,22 @@
 namespace DB
 {
 
+static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
+{
+    auto expr = ttl_descr.buildExpression(context);
+    auto where_expr = ttl_descr.buildWhereExpression(context);
+
+    auto expr_queries = expr.sets->getSubqueries();
+    auto where_expr_queries = expr.sets->getSubqueries();
+
+    subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
+    subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+
+    return {expr.expression, where_expr.expression};
+}
+
 TTLTransform::TTLTransform(
+    const ContextPtr & context,
     const Block & header_,
     const MergeTreeData & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -33,7 +48,8 @@ TTLTransform::TTLTransform(
     {
         const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
         auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
-            rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
+            getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
+            old_ttl_infos.table_ttl, current_time_, force_);
 
         /// Skip all data if table ttl is expired for part
         if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression_ast)
@@ -45,11 +61,13 @@ TTLTransform::TTLTransform(
 
     for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
         algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
-            where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
+            getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
+            old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
 
     for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
         algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
-                group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
+                getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
+                old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
                 getInputPort().getHeader(), storage_));
 
     if (metadata_snapshot_->hasAnyColumnTTL())
@@ -75,18 +93,21 @@ TTLTransform::TTLTransform(
             }
 
             algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
-                description, old_ttl_infos.columns_ttl[name], current_time_,
+                getExpressions(description, subqueries_for_sets, context), description,
+                old_ttl_infos.columns_ttl[name], current_time_,
                 force_, name, default_expression, default_column_name, isCompactPart(data_part)));
         }
     }
 
     for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
+            getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
+            TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
 
     for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
         algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
-            recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
+            getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
+            TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
 }
 
 Block reorderColumns(Block block, const Block & header)
diff --git a/src/Processors/Transforms/TTLTransform.h b/src/Processors/Transforms/TTLTransform.h
index 3f0dffd1998..47da456a2e3 100644
--- a/src/Processors/Transforms/TTLTransform.h
+++ b/src/Processors/Transforms/TTLTransform.h
@@ -16,6 +16,7 @@ class TTLTransform : public IAccumulatingTransform
 {
 public:
     TTLTransform(
+        const ContextPtr & context,
         const Block & header_,
         const MergeTreeData & storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -28,6 +29,8 @@ public:
 
     Status prepare() override;
 
+    PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
+
 protected:
     void consume(Chunk chunk) override;
     Chunk generate() override;
@@ -40,6 +43,8 @@ private:
     const TTLDeleteAlgorithm * delete_algorithm = nullptr;
     bool all_data_dropped = false;
 
+    PreparedSets::Subqueries subqueries_for_sets;
+
     /// ttl_infos and empty_columns are updating while reading
     const MergeTreeData::MutableDataPartPtr & data_part;
     Poco::Logger * log;
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index e8e307bb148..26b290d33d5 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -31,6 +31,9 @@
 #include <Processors/Transforms/TTLCalcTransform.h>
 #include <Processors/Transforms/DistinctSortedTransform.h>
 #include <Processors/Transforms/DistinctTransform.h>
+#include <Processors/QueryPlan/CreatingSetsStep.h>
+#include <Interpreters/PreparedSets.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
@@ -1004,8 +1007,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
             break;
     }
 
-    auto res_pipe = Pipe::unitePipes(std::move(pipes));
-    res_pipe.addTransform(std::move(merged_transform));
+    auto builder = std::make_unique<QueryPipelineBuilder>();
+    builder->init(Pipe::unitePipes(std::move(pipes)));
+    builder->addTransform(std::move(merged_transform));
 
     if (global_ctx->deduplicate)
     {
@@ -1021,26 +1025,34 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
         }
 
         if (DistinctSortedTransform::isApplicable(header, sort_description, global_ctx->deduplicate_by_columns))
-            res_pipe.addTransform(std::make_shared<DistinctSortedTransform>(
-                res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
+            builder->addTransform(std::make_shared<DistinctSortedTransform>(
+                builder->getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
         else
-            res_pipe.addTransform(std::make_shared<DistinctTransform>(
-                res_pipe.getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
+            builder->addTransform(std::make_shared<DistinctTransform>(
+                builder->getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
     }
 
+    PreparedSets::Subqueries subqueries;
+
     if (ctx->need_remove_expired_values)
-        res_pipe.addTransform(std::make_shared<TTLTransform>(
-            res_pipe.getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl));
+    {
+        auto transform = std::make_shared<TTLTransform>(global_ctx->context, builder->getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
+        subqueries = transform->getSubqueries();
+        builder->addTransform(std::move(transform));
+    }
 
     if (global_ctx->metadata_snapshot->hasSecondaryIndices())
     {
         const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices();
-        res_pipe.addTransform(std::make_shared<ExpressionTransform>(
-            res_pipe.getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
-        res_pipe.addTransform(std::make_shared<MaterializingTransform>(res_pipe.getHeader()));
+        builder->addTransform(std::make_shared<ExpressionTransform>(
+            builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
+        builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
     }
 
-    global_ctx->merged_pipeline = QueryPipeline(std::move(res_pipe));
+    if (!subqueries.empty())
+        builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), global_ctx->context);
+
+    global_ctx->merged_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
     /// Dereference unique_ptr and pass horizontal_stage_progress by reference
     global_ctx->merged_pipeline.setProgressCallback(MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->horizontal_stage_progress));
     /// Is calculated inside MergeProgressCallback.
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index d080240b066..ce9e5762cb4 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -132,7 +132,7 @@ void updateTTL(
     const Block & block,
     bool update_part_min_max_ttls)
 {
-    auto expr_and_set = ttl_entry.buildExpression();
+    auto expr_and_set = ttl_entry.buildExpression(context);
     for (auto & subquery : expr_and_set.sets->getSubqueries())
         subquery->buildSetInplace(context);
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 6b6b5947581..61849f94e44 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -8,6 +8,7 @@
 #include <Parsers/queryToString.h>
 #include <Interpreters/SquashingTransform.h>
 #include <Interpreters/MergeTreeTransaction.h>
+#include <Interpreters/PreparedSets.h>
 #include <Processors/Transforms/TTLTransform.h>
 #include <Processors/Transforms/TTLCalcTransform.h>
 #include <Processors/Transforms/DistinctSortedTransform.h>
@@ -16,6 +17,7 @@
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/QueryPlan/CreatingSetsStep.h>
 #include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
 #include <Storages/MergeTree/MergeTreeDataWriter.h>
 #include <Storages/MutationCommands.h>
@@ -1507,21 +1509,34 @@ private:
         if (!ctx->mutating_pipeline_builder.initialized())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot mutate part columns with uninitialized mutations stream. It's a bug");
 
-        QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
+        auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
 
         if (ctx->metadata_snapshot->hasPrimaryKey() || ctx->metadata_snapshot->hasSecondaryIndices())
         {
-            builder.addTransform(std::make_shared<ExpressionTransform>(
-                builder.getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
+            builder->addTransform(std::make_shared<ExpressionTransform>(
+                builder->getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
 
-            builder.addTransform(std::make_shared<MaterializingTransform>(builder.getHeader()));
+            builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
         }
 
+        PreparedSets::Subqueries subqueries;
+
         if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
-            builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
+        {
+            auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+            subqueries = transform->getSubqueries();
+            builder->addTransform(std::move(transform));
+        }
 
         if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
-            builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
+        {
+            auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+            subqueries = transform->getSubqueries();
+            builder->addTransform(std::move(transform));
+        }
+
+        if (!subqueries.empty())
+            builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
 
         ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
 
@@ -1537,7 +1552,7 @@ private:
             /*blocks_are_granules_size=*/ false,
             ctx->context->getWriteSettings());
 
-        ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
+        ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
         ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
         /// Is calculated inside MergeProgressCallback.
         ctx->mutating_pipeline.disableProfileEventUpdate();
@@ -1712,13 +1727,25 @@ private:
 
         if (ctx->mutating_pipeline_builder.initialized())
         {
-            QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
+            auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
+            PreparedSets::Subqueries subqueries;
 
             if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
-                builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
+            {
+                auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+                subqueries = transform->getSubqueries();
+                builder->addTransform(std::move(transform));
+            }
 
             if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
-                builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
+            {
+                auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
+                subqueries = transform->getSubqueries();
+                builder->addTransform(std::move(transform));
+            }
+
+            if (!subqueries.empty())
+                builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
 
             ctx->out = std::make_shared<MergedColumnOnlyOutputStream>(
                 ctx->new_data_part,
@@ -1732,7 +1759,7 @@ private:
                 &ctx->source_part->index_granularity_info
             );
 
-            ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
+            ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
             ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
             /// Is calculated inside MergeProgressCallback.
             ctx->mutating_pipeline.disableProfileEventUpdate();
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 7db5af82e0b..158c13b653d 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -279,7 +279,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
 
     auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
     {
-        if (add_dependent_columns(expression, to_set) && include_ttl_target)
+        if (add_dependent_columns(expression.getNames(), to_set) && include_ttl_target)
         {
             /// Filter all columns, if rows TTL expression have to be recalculated.
             for (const auto & column : getColumns().getAllPhysical())
@@ -297,16 +297,16 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
         add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
 
     for (const auto & entry : getRecompressionTTLs())
-        add_dependent_columns(entry.expression_columns, required_ttl_columns);
+        add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
 
     for (const auto & [name, entry] : getColumnTTLs())
     {
-        if (add_dependent_columns(entry.expression_columns, required_ttl_columns) && include_ttl_target)
+        if (add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns) && include_ttl_target)
             updated_ttl_columns.insert(name);
     }
 
     for (const auto & entry : getMoveTTLs())
-        add_dependent_columns(entry.expression_columns, required_ttl_columns);
+        add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
 
     //TODO what about rows_where_ttl and group_by_ttl ??
 
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 47138f30e4f..e02ac933028 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -169,6 +169,23 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType
     return result;
 }
 
+ExpressionAndSets TTLDescription::buildExpression(const ContextPtr & context) const
+{
+    auto ast = expression_ast->clone();
+    return buildExpressionAndSets(ast, expression_columns, context);
+}
+
+ExpressionAndSets TTLDescription::buildWhereExpression(const ContextPtr & context) const
+{
+    if (where_expression_ast)
+    {
+        auto ast = where_expression_ast->clone();
+        return buildExpressionAndSets(ast, where_expression_columns, context);
+    }
+
+    return {};
+}
+
 TTLDescription TTLDescription::getTTLFromAST(
     const ASTPtr & definition_ast,
     const ColumnsDescription & columns,
@@ -186,7 +203,7 @@ TTLDescription TTLDescription::getTTLFromAST(
 
     auto ttl_ast = result.expression_ast->clone();
     auto expression = buildExpressionAndSets(ttl_ast, columns.getAllPhysical(), context).expression;
-    result.expression_columns = expression->getRequiredColumns();
+    result.expression_columns = expression->getRequiredColumnsWithTypes();
 
     // auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
     // result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
@@ -214,6 +231,8 @@ TTLDescription TTLDescription::getTTLFromAST(
                 where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
                 // auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
                 // result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
+
+                result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
                 result.where_result_column = where_expr_ast->getColumnName();
             }
         }
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index 5ea243424cb..7dfc736ded2 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -51,10 +51,10 @@ struct TTLDescription
     /// TTL d + INTERVAL 1 DAY
     ///    ^~~~~~~~~~~~~~~~~~~^
     ASTPtr expression_ast;
-    Names expression_columns;
+    NamesAndTypesList expression_columns;
 
     /// Expression actions evaluated from AST
-    ExpressionAndSets buildExpression() const;
+    ExpressionAndSets buildExpression(const ContextPtr & context) const;
 
     /// Result column of this TTL expression
     String result_column;
@@ -63,7 +63,8 @@ struct TTLDescription
     /// TTL ... WHERE x % 10 == 0 and y > 5
     ///              ^~~~~~~~~~~~~~~~~~~~~~^
     ASTPtr where_expression_ast;
-    ExpressionAndSets buildWhereExpression() const;
+    NamesAndTypesList where_expression_columns;
+    ExpressionAndSets buildWhereExpression(const ContextPtr & context) const;
 
     /// Name of result column from WHERE expression
     String where_result_column;

From 16558ccc840d7a15efb2ab0fe691a79c38dd5086 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 4 Dec 2023 18:13:34 +0000
Subject: [PATCH 022/884] Fix some tests

---
 src/Storages/TTLDescription.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index e02ac933028..e32ff11860b 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -103,7 +103,10 @@ using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFin
 TTLDescription::TTLDescription(const TTLDescription & other)
     : mode(other.mode)
     , expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr)
+    , expression_columns(other.expression_columns)
     , result_column(other.result_column)
+    , where_expression_ast(other.where_expression_ast ? other.where_expression_ast->clone() : nullptr)
+    , where_expression_columns(other.where_expression_columns)
     , where_result_column(other.where_result_column)
     , group_by_keys(other.group_by_keys)
     , set_parts(other.set_parts)
@@ -136,12 +139,20 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     // else
     //     expression.reset();
 
+    expression_columns = other.expression_columns;
     result_column = other.result_column;
+
+    if (other.where_expression_ast)
+        where_expression_ast = other.where_expression_ast->clone();
+    else
+        where_expression_ast.reset();
+
     // if (other.where_expression)
     //     where_expression = other.where_expression->clone();
     // else
     //     where_expression.reset();
 
+    where_expression_columns = other.where_expression_columns;
     where_result_column = other.where_result_column;
     group_by_keys = other.group_by_keys;
     set_parts = other.set_parts;

From 6a821f9e737373b28bc98f25e10439dd04e7bdb8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 4 Dec 2023 19:24:27 +0000
Subject: [PATCH 023/884] Fix some staff

---
 src/Processors/QueryPlan/CreatingSetsStep.cpp  |  3 +--
 src/Processors/Transforms/TTLCalcTransform.cpp | 12 +++++++-----
 src/Processors/Transforms/TTLTransform.cpp     | 12 +++++++-----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 11415e8d815..f13a717004f 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -182,8 +182,7 @@ QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipelin
         pipelines.emplace_back(plan->buildQueryPipeline(plan_settings, pipeline_settings));
     }
 
-    CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
-    return std::move(pipelines.front());
+    return CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
 }
 
 std::vector<std::unique_ptr<QueryPlan>> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step)
diff --git a/src/Processors/Transforms/TTLCalcTransform.cpp b/src/Processors/Transforms/TTLCalcTransform.cpp
index 204dfe21733..0af9f38b20f 100644
--- a/src/Processors/Transforms/TTLCalcTransform.cpp
+++ b/src/Processors/Transforms/TTLCalcTransform.cpp
@@ -7,13 +7,15 @@ namespace DB
 static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
 {
     auto expr = ttl_descr.buildExpression(context);
-    auto where_expr = ttl_descr.buildWhereExpression(context);
-
     auto expr_queries = expr.sets->getSubqueries();
-    auto where_expr_queries = expr.sets->getSubqueries();
-
     subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
-    subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+
+    auto where_expr = ttl_descr.buildWhereExpression(context);
+    if (where_expr.sets)
+    {
+        auto where_expr_queries = where_expr.sets->getSubqueries();
+        subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+    }
 
     return {expr.expression, where_expr.expression};
 }
diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp
index 69e2e6e5fc0..69b7d80c563 100644
--- a/src/Processors/Transforms/TTLTransform.cpp
+++ b/src/Processors/Transforms/TTLTransform.cpp
@@ -19,13 +19,15 @@ namespace DB
 static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
 {
     auto expr = ttl_descr.buildExpression(context);
-    auto where_expr = ttl_descr.buildWhereExpression(context);
-
     auto expr_queries = expr.sets->getSubqueries();
-    auto where_expr_queries = expr.sets->getSubqueries();
-
     subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
-    subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+
+    auto where_expr = ttl_descr.buildWhereExpression(context);
+    if (where_expr.sets)
+    {
+        auto where_expr_queries = where_expr.sets->getSubqueries();
+        subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
+    }
 
     return {expr.expression, where_expr.expression};
 }

From 0015ec28f9f70548c31e220f2dd826e4ac21f007 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 5 Dec 2023 12:45:25 +0000
Subject: [PATCH 024/884] Fixing test.

---
 src/Storages/TTLDescription.cpp | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index e32ff11860b..bfd3afc30d8 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -18,6 +18,7 @@
 #include <Interpreters/FunctionNameNormalizer.h>
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
+#include <Parsers/queryToString.h>
 
 
 namespace DB
@@ -172,11 +173,26 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
 static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndTypesList & columns, const ContextPtr & context)
 {
     ExpressionAndSets result;
+    auto ttl_string = queryToString(ast);
     auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, columns);
     ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context);
-    result.expression = analyzer.getActions(false);
+    auto dag = analyzer.getActionsDAG(false);
+
+    const auto * col = &dag->findInOutputs(ast->getColumnName());
+    // std::cerr << "buildExpressionAndSets " << ttl_string << std::endl;
+    if (col->result_name != ttl_string)
+        col = &dag->addAlias(*col, ttl_string);
+
+    dag->getOutputs() = {col};
+    dag->removeUnusedActions();
+
+    result.expression = std::make_shared<ExpressionActions>(dag, ExpressionActionsSettings::fromContext(context));
     result.sets = analyzer.getPreparedSets();
 
+    // std::cerr << "--------- buildExpressionAndSets\n";
+    // std::cerr << result.expression->dumpActions() << std::endl;
+    // std::cerr << result.sets->getSubqueries().size() << std::endl;
+
     return result;
 }
 
@@ -218,7 +234,7 @@ TTLDescription TTLDescription::getTTLFromAST(
 
     // auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
     // result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
-    result.result_column = ttl_ast->getColumnName();
+    result.result_column = expression->getSampleBlock().safeGetByPosition(0).name;
 
     ExpressionActionsPtr where_expression;
 
@@ -244,7 +260,7 @@ TTLDescription TTLDescription::getTTLFromAST(
                 // result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
 
                 result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
-                result.where_result_column = where_expr_ast->getColumnName();
+                result.where_result_column = where_expression->getSampleBlock().safeGetByPosition(0).name;
             }
         }
         else if (ttl_element->mode == TTLMode::GROUP_BY)

From 43a23898e0ddb71fe810dafd850cef911dace902 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 5 Dec 2023 14:20:07 +0000
Subject: [PATCH 025/884] Updating the tests.

---
 .../0_stateless/01465_ttl_recompression.reference      |  6 +++---
 .../queries/0_stateless/02932_set_ttl_where.reference  |  3 +++
 tests/queries/0_stateless/02932_set_ttl_where.sql      | 10 +---------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index 108df565669..90661a5dc78 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -13,9 +13,9 @@ CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt6
 1_1_1	LZ4
 2_2_2	ZSTD(12)
 3_3_3	ZSTD(12)
-1_1_1	['plus(dt, toIntervalDay(1))']
-2_2_2	['plus(dt, toIntervalDay(1))']
-3_3_3	['plus(dt, toIntervalDay(1))']
+1_1_1	['dt + toIntervalDay(1)']
+2_2_2	['dt + toIntervalDay(1)']
+3_3_3	['dt + toIntervalDay(1)']
 1_1_1	LZ4
 2_2_2	LZ4
 3_3_3	LZ4
diff --git a/tests/queries/0_stateless/02932_set_ttl_where.reference b/tests/queries/0_stateless/02932_set_ttl_where.reference
index e69de29bb2d..bb0b1cf658d 100644
--- a/tests/queries/0_stateless/02932_set_ttl_where.reference
+++ b/tests/queries/0_stateless/02932_set_ttl_where.reference
@@ -0,0 +1,3 @@
+0
+0
+0
diff --git a/tests/queries/0_stateless/02932_set_ttl_where.sql b/tests/queries/0_stateless/02932_set_ttl_where.sql
index 85fddf613e8..bf2b317c4bf 100644
--- a/tests/queries/0_stateless/02932_set_ttl_where.sql
+++ b/tests/queries/0_stateless/02932_set_ttl_where.sql
@@ -1,18 +1,10 @@
-create or replace table temp (
-    a UInt32
-)
-engine = MergeTree
-order by a;
-
-insert into temp select number from system.numbers limit 100_000;
-
 create or replace table t_temp (
     a UInt32,
     timestamp DateTime
 )
 engine = MergeTree
 order by a
-TTL timestamp + INTERVAL 2 SECOND WHERE a in (select a from temp);
+TTL timestamp + INTERVAL 2 SECOND WHERE a in (select number from system.numbers limit 100_000);
 
 select sleep(1);
 insert into t_temp select rand(), now() from system.numbers limit 1_000_000;

From 7dc7062dadd5ddf3bed3dea4364cabfa97bcd61a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 6 Dec 2023 12:53:14 +0000
Subject: [PATCH 026/884] Fixing test.

---
 src/Interpreters/PreparedSets.cpp | 3 ++-
 src/Interpreters/Set.h            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index ea8d9a62b8b..9f646825d9f 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -189,7 +189,8 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context)
         }
     }
 
-    set_and_key->set->fillSetElements();
+    if (!set_and_key->set->hasSetElements())
+        set_and_key->set->fillSetElements();
 
     return buildSetInplace(context);
 }
diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h
index 7136b090c42..7e8e0f2371b 100644
--- a/src/Interpreters/Set.h
+++ b/src/Interpreters/Set.h
@@ -77,6 +77,7 @@ public:
     const DataTypes & getElementsTypes() const { return set_elements_types; }
 
     bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); }
+    bool hasSetElements() const { return !set_elements.empty(); }
     Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; }
 
     void checkColumnsNumber(size_t num_key_columns) const;

From 59153e865d4ffeda3c67cbdd945e14fdc860e446 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 19 Dec 2023 09:53:04 +0000
Subject: [PATCH 027/884] materialize column not to override past values

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/MergeTree/MutateTask.cpp         | 23 +++++++--
 .../0_stateless/02008_materialize_column.sql  |  1 +
 ..._column_not_override_past_values.reference | 29 +++++++++++
 ...ialize_column_not_override_past_values.sql | 49 +++++++++++++++++++
 4 files changed, 97 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
 create mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 827749aa094..a04d9cdb886 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -65,6 +65,7 @@ static void splitAndModifyMutationCommands(
     Poco::Logger * log)
 {
     auto part_columns = part->getColumnsDescription();
+    const auto & table_columns = metadata_snapshot->getColumns();
 
     if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
     {
@@ -73,9 +74,16 @@ static void splitAndModifyMutationCommands(
 
         for (const auto & command : commands)
         {
+            if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
+            {
+                /// For ordinary column with default expression, materialize column should not override past values
+                /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file
+                auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
+                if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
+                    mutated_columns.emplace(command.column_name);
+            }
             if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                 || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
-                || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
                 || command.type == MutationCommand::Type::DELETE
@@ -85,8 +93,6 @@ static void splitAndModifyMutationCommands(
                 for (const auto & [column_name, expr] : command.column_to_update_expression)
                     mutated_columns.emplace(column_name);
 
-                if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
-                    mutated_columns.emplace(command.column_name);
             }
             else if (command.type == MutationCommand::Type::DROP_INDEX
                      || command.type == MutationCommand::Type::DROP_PROJECTION
@@ -196,8 +202,15 @@ static void splitAndModifyMutationCommands(
     {
         for (const auto & command : commands)
         {
-            if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
-                || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
+            if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
+            {
+                /// For ordinary column with default expression, materialize column should not override past values
+                /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file
+                auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
+                if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
+                    for_interpreter.push_back(command);
+            }
+            else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                 || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql
index a78920d2525..cc7d3096402 100644
--- a/tests/queries/0_stateless/02008_materialize_column.sql
+++ b/tests/queries/0_stateless/02008_materialize_column.sql
@@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2);
 SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
+ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values;
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3);
 SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
new file mode 100644
index 00000000000..6b0d88bd09b
--- /dev/null
+++ b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
@@ -0,0 +1,29 @@
+--Origin--
+1	2
+2	54321
+--After materialize--
+1	2
+2	54321
+--Origin--
+1	2
+2	54321
+--After materialize--
+1	2
+2	54321
+--Origin--
+1	2
+2	\N
+3	54321
+--After materialize--
+1	2
+2	\N
+3	54321
+--Origin--
+1	2
+2	54321
+--After rename--
+1	2
+2	54321
+--After materialize--
+1	2
+2	54321
diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql
new file mode 100644
index 00000000000..1815661e097
--- /dev/null
+++ b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql
@@ -0,0 +1,49 @@
+
+SET mutations_sync = 2;
+-- Compact parts
+CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO test ( id, foo ) values ( 1, 2 );
+INSERT INTO test ( id ) values ( 2 );
+SELECT '--Origin--';
+SELECT * FROM test ORDER BY id;
+ALTER TABLE test MATERIALIZE COLUMN foo;
+SELECT '--After materialize--';
+SELECT * FROM test ORDER BY id;
+DROP TABLE test;
+
+-- Wide parts
+CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO test ( id, foo ) values ( 1, 2 );
+INSERT INTO test ( id ) values ( 2 );
+SELECT '--Origin--';
+SELECT * FROM test ORDER BY id;
+ALTER TABLE test MATERIALIZE COLUMN foo;
+SELECT '--After materialize--';
+SELECT * FROM test ORDER BY id;
+DROP TABLE test;
+
+-- Nullable column != physically absent
+CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO test ( id, foo ) values ( 1, 2 );
+INSERT INTO test ( id, foo ) values ( 2, NULL );
+INSERT INTO test ( id ) values ( 3 );
+SELECT '--Origin--';
+SELECT * FROM test ORDER BY id;
+ALTER TABLE test MATERIALIZE COLUMN foo;
+SELECT '--After materialize--';
+SELECT * FROM test ORDER BY id;
+DROP TABLE test;
+
+-- Parts with renamed column
+CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO test ( id, foo ) values ( 1, 2 );
+INSERT INTO test ( id ) values ( 2 );
+SELECT '--Origin--';
+SELECT * FROM test ORDER BY id;
+ALTER TABLE test RENAME COLUMN foo TO bar;
+SELECT '--After rename--';
+SELECT * FROM test ORDER BY id;
+ALTER TABLE test MATERIALIZE COLUMN bar;
+SELECT '--After materialize--';
+SELECT * FROM test ORDER BY id;
+DROP TABLE test;
\ No newline at end of file

From a924b01a023512727d6a36fc12052f67438ba199 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Tue, 19 Dec 2023 02:05:32 -0800
Subject: [PATCH 028/884] [Docs] Clarify to use query level settings in
 ClickHouse Cloud

---
 docs/en/operations/query-cache.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index def0f48b968..2f05599e666 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -31,6 +31,10 @@ This reduces maintenance effort and avoids redundancy.
 
 ## Configuration Settings and Usage
 
+:::note
+In ClickHouse Cloud, you must use [query level settings](/en/operations/settings/query-level) to edit query cache settings. Editing [config level settings](/en/operations/configuration-files) is currently not supported.
+:::
+
 Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
 current session should utilize the query cache. For example, the first execution of query
 

From e832599dfab7ba2304a4a00175ce48f6a63ed701 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Dec 2023 04:57:56 +0000
Subject: [PATCH 029/884] fix materialize column for compact parts

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/MergeTree/MutateTask.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index a04d9cdb886..dd84aa0d98a 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -80,7 +80,11 @@ static void splitAndModifyMutationCommands(
                 /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file
                 auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
                 if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
+                {
+                    LOG_DEBUG(log, "Materializing column {}\n", command.column_name);
+                    for_interpreter.push_back(command);
                     mutated_columns.emplace(command.column_name);
+                }
             }
             if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                 || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
@@ -92,7 +96,6 @@ static void splitAndModifyMutationCommands(
                 for_interpreter.push_back(command);
                 for (const auto & [column_name, expr] : command.column_to_update_expression)
                     mutated_columns.emplace(column_name);
-
             }
             else if (command.type == MutationCommand::Type::DROP_INDEX
                      || command.type == MutationCommand::Type::DROP_PROJECTION

From 7b49a0e530e2a2cb8629c249b96f43c6554ea51d Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Dec 2023 04:59:03 +0000
Subject: [PATCH 030/884] remove junk log

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/MergeTree/MutateTask.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index dd84aa0d98a..bb41608eb00 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -81,7 +81,6 @@ static void splitAndModifyMutationCommands(
                 auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
                 if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
                 {
-                    LOG_DEBUG(log, "Materializing column {}\n", command.column_name);
                     for_interpreter.push_back(command);
                     mutated_columns.emplace(command.column_name);
                 }

From bc757559c9f3fd1943bf338dc4fdac9e0e61240a Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Sat, 23 Dec 2023 18:10:42 +0800
Subject: [PATCH 031/884] feat:add InitialQuery event

---
 src/Common/ProfileEvents.cpp                               | 1 +
 src/Databases/DatabaseReplicatedWorker.cpp                 | 7 +++++--
 src/Interpreters/DDLWorker.cpp                             | 2 +-
 .../queries/0_stateless/02950_initialquery_event.reference | 1 +
 tests/queries/0_stateless/02950_initialquery_event.sql     | 1 +
 5 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02950_initialquery_event.reference
 create mode 100644 tests/queries/0_stateless/02950_initialquery_event.sql

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f342a19b2aa..a2dc7f5ecd6 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -8,6 +8,7 @@
     M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \
     M(SelectQuery, "Same as Query, but only for SELECT queries.") \
     M(InsertQuery, "Same as Query, but only for INSERT queries.") \
+    M(InitialQuery, "Same as Query, but only counts initial queries (see is_initial_query).")\
     M(QueriesWithSubqueries, "Count queries with all subqueries") \
     M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \
     M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 2056b403ff6..c90af7d4ea8 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -6,7 +6,10 @@
 #include <filesystem>
 
 namespace fs = std::filesystem;
-
+namespace ProfileEvents
+{
+    extern const Event InitialQuery;
+}
 namespace DB
 {
 
@@ -264,7 +267,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     chassert(!task->entry.query.empty());
     assert(!zookeeper->exists(task->getFinishedNodePath()));
     task->is_initial_query = true;
-
+    ProfileEvents::increment(ProfileEvents::InitialQuery);
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
     UInt64 timeout = query_context->getSettingsRef().database_replicated_initial_query_timeout_sec;
     {
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index f08fd72ff7f..ac3af6e441c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -490,7 +490,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep
 
         if (!task.is_initial_query)
             query_scope.emplace(query_context);
-
+        
         executeQuery(istr, ostr, !task.is_initial_query, query_context, {}, QueryFlags{ .internal = false, .distributed_backup_restore = task.entry.is_backup_restore });
 
         if (auto txn = query_context->getZooKeeperMetadataTransaction())
diff --git a/tests/queries/0_stateless/02950_initialquery_event.reference b/tests/queries/0_stateless/02950_initialquery_event.reference
new file mode 100644
index 00000000000..7ad67a1e7e4
--- /dev/null
+++ b/tests/queries/0_stateless/02950_initialquery_event.reference
@@ -0,0 +1 @@
+InitialQuery	6	Same as Query, but only counts initial queries (see is_initial_query).
diff --git a/tests/queries/0_stateless/02950_initialquery_event.sql b/tests/queries/0_stateless/02950_initialquery_event.sql
new file mode 100644
index 00000000000..2b03607c5c7
--- /dev/null
+++ b/tests/queries/0_stateless/02950_initialquery_event.sql
@@ -0,0 +1 @@
+SELECT * FROM system.events where event = 'InitialQuery'
\ No newline at end of file

From b38e7060ef455e6ae569d371203309a1ad992c66 Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Sat, 23 Dec 2023 18:36:23 +0800
Subject: [PATCH 032/884] feat:add InitialQuery event

---
 src/Common/ProfileEvents.cpp                               | 1 +
 src/Databases/DatabaseReplicatedWorker.cpp                 | 7 +++++--
 src/Interpreters/DDLWorker.cpp                             | 2 +-
 .../queries/0_stateless/02950_initialquery_event.reference | 1 +
 tests/queries/0_stateless/02950_initialquery_event.sql     | 1 +
 5 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02950_initialquery_event.reference
 create mode 100644 tests/queries/0_stateless/02950_initialquery_event.sql

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f342a19b2aa..a2dc7f5ecd6 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -8,6 +8,7 @@
     M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \
     M(SelectQuery, "Same as Query, but only for SELECT queries.") \
     M(InsertQuery, "Same as Query, but only for INSERT queries.") \
+    M(InitialQuery, "Same as Query, but only counts initial queries (see is_initial_query).")\
     M(QueriesWithSubqueries, "Count queries with all subqueries") \
     M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \
     M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 2056b403ff6..c90af7d4ea8 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -6,7 +6,10 @@
 #include <filesystem>
 
 namespace fs = std::filesystem;
-
+namespace ProfileEvents
+{
+    extern const Event InitialQuery;
+}
 namespace DB
 {
 
@@ -264,7 +267,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     chassert(!task->entry.query.empty());
     assert(!zookeeper->exists(task->getFinishedNodePath()));
     task->is_initial_query = true;
-
+    ProfileEvents::increment(ProfileEvents::InitialQuery);
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
     UInt64 timeout = query_context->getSettingsRef().database_replicated_initial_query_timeout_sec;
     {
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index f08fd72ff7f..ac3af6e441c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -490,7 +490,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep
 
         if (!task.is_initial_query)
             query_scope.emplace(query_context);
-
+        
         executeQuery(istr, ostr, !task.is_initial_query, query_context, {}, QueryFlags{ .internal = false, .distributed_backup_restore = task.entry.is_backup_restore });
 
         if (auto txn = query_context->getZooKeeperMetadataTransaction())
diff --git a/tests/queries/0_stateless/02950_initialquery_event.reference b/tests/queries/0_stateless/02950_initialquery_event.reference
new file mode 100644
index 00000000000..7ad67a1e7e4
--- /dev/null
+++ b/tests/queries/0_stateless/02950_initialquery_event.reference
@@ -0,0 +1 @@
+InitialQuery	6	Same as Query, but only counts initial queries (see is_initial_query).
diff --git a/tests/queries/0_stateless/02950_initialquery_event.sql b/tests/queries/0_stateless/02950_initialquery_event.sql
new file mode 100644
index 00000000000..2b03607c5c7
--- /dev/null
+++ b/tests/queries/0_stateless/02950_initialquery_event.sql
@@ -0,0 +1 @@
+SELECT * FROM system.events where event = 'InitialQuery'
\ No newline at end of file

From 3e22f29b4529b6fefd5e92616ce9ef1ac33966d0 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sat, 23 Dec 2023 11:40:58 +0100
Subject: [PATCH 033/884] Fixed parameters

---
 docs/en/operations/backup.md                  |  2 +-
 .../registerBackupEngineAzureBlobStorage.cpp  | 25 +++++++++++++++----
 .../test.py                                   |  2 +-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index 15d953249a0..4871f97c270 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -463,7 +463,7 @@ To write backups to an AzureBlobStorage container you need the following pieces
 
 The destination for a backup will be specified like this:
 ```
-AzureBlobStorage('<connection string>/<url>', '<Container>', '<Path>', '<Account Name>', '<Account Key>)
+AzureBlobStorage('<connection string>/<url>', '<container>', '<path>', '<account name>', '<account key>')
 ```
 
 ```sql
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 6f7b5f38c28..ef95206831f 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -22,6 +22,7 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 #if USE_AZURE_BLOB_STORAGE
@@ -54,20 +55,34 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
 
         StorageAzureBlob::Configuration configuration;
 
-        if (args.size() == 4)
+        if (args.size() == 3)
         {
             configuration.connection_url = args[0].safeGet<String>();
             configuration.is_connection_string = true;
 
             configuration.container =  args[1].safeGet<String>();
             configuration.blob_path = args[2].safeGet<String>();
-            configuration.format = args[3].safeGet<String>();
 
             LOG_TRACE(&Poco::Logger::get("registerBackupEngineAzureBlobStorage"), "configuration.connection_url = {}"
                                                                                  "configuration.container = {}"
-                                                                                 "configuration.blob_path = {}"
-                                                                                 "configuration.format = {}",
-                                                                                 configuration.connection_url, configuration.container, configuration.blob_path, configuration.format);
+                                                                                 "configuration.blob_path = {}",
+                                                                                 configuration.connection_url, configuration.container, configuration.blob_path);
+        }
+        else if (args.size() == 5)
+        {
+            configuration.connection_url = args[0].safeGet<String>();
+            configuration.is_connection_string = false;
+
+            configuration.container =  args[1].safeGet<String>();
+            configuration.blob_path = args[2].safeGet<String>();
+            configuration.account_name = args[3].safeGet<String>();
+            configuration.account_key = args[4].safeGet<String>();
+
+        }
+        else
+        {
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                                "Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
         }
 
 
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index cda3cab07e4..0a48d3523f0 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -147,7 +147,7 @@ def test_backup_restore(cluster):
     print(get_azure_file_content("test_simple_write_c.csv", port))
     assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
 
-    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv', 'CSV')"
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv')"
     azure_query(
         node,
         f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}",

From f780a2f838d2b8a83eed2ec97703571528a7c7ed Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Sat, 23 Dec 2023 18:42:41 +0800
Subject: [PATCH 034/884] feat:add InitialQuery event

---
 src/Interpreters/DDLWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index ac3af6e441c..f08fd72ff7f 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -490,7 +490,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep
 
         if (!task.is_initial_query)
             query_scope.emplace(query_context);
-        
+
         executeQuery(istr, ostr, !task.is_initial_query, query_context, {}, QueryFlags{ .internal = false, .distributed_backup_restore = task.entry.is_backup_restore });
 
         if (auto txn = query_context->getZooKeeperMetadataTransaction())

From fa5dde0bff8f34ebe85e1cc6e929f834c5e6b496 Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 12:37:06 +0800
Subject: [PATCH 035/884] feat: Add initial query event

---
 src/Databases/DatabaseReplicatedWorker.cpp    |  6 +--
 src/Interpreters/InterpreterFactory.cpp       |  5 +-
 ..._distributed_initial_query_event.reference |  6 +++
 .../02950_distributed_initial_query_event.sh  | 54 +++++++++++++++++++
 .../02950_initialquery_event.reference        |  1 -
 .../0_stateless/02950_initialquery_event.sql  |  1 -
 6 files changed, 66 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/02950_distributed_initial_query_event.reference
 create mode 100644 tests/queries/0_stateless/02950_distributed_initial_query_event.sh
 delete mode 100644 tests/queries/0_stateless/02950_initialquery_event.reference
 delete mode 100644 tests/queries/0_stateless/02950_initialquery_event.sql

diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index c90af7d4ea8..317cda3cd3d 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -7,9 +7,7 @@
 
 namespace fs = std::filesystem;
 namespace ProfileEvents
-{
-    extern const Event InitialQuery;
-}
+
 namespace DB
 {
 
@@ -267,7 +265,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     chassert(!task->entry.query.empty());
     assert(!zookeeper->exists(task->getFinishedNodePath()));
     task->is_initial_query = true;
-    ProfileEvents::increment(ProfileEvents::InitialQuery);
+    
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
     UInt64 timeout = query_context->getSettingsRef().database_replicated_initial_query_timeout_sec;
     {
diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp
index e32cbe4ccad..fdf7e8ebfbb 100644
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@@ -120,6 +120,7 @@
 namespace ProfileEvents
 {
     extern const Event Query;
+    extern const Event InitialQuery;
     extern const Event QueriesWithSubqueries;
     extern const Event SelectQuery;
     extern const Event InsertQuery;
@@ -137,7 +138,9 @@ namespace ErrorCodes
 std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMutablePtr context, const SelectQueryOptions & options)
 {
     ProfileEvents::increment(ProfileEvents::Query);
-
+    
+    if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
+        ProfileEvents::increment(ProfileEvents::InitialQuery);
     /// SELECT and INSERT query will handle QueriesWithSubqueries on their own.
     if (!(query->as<ASTSelectQuery>() ||
         query->as<ASTSelectWithUnionQuery>() ||
diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.reference b/tests/queries/0_stateless/02950_distributed_initial_query_event.reference
new file mode 100644
index 00000000000..af8542c7204
--- /dev/null
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.reference
@@ -0,0 +1,6 @@
+Local situation
+Initial Query Difference: 1
+Query Difference: 1
+Distributed situation
+Initial Query Difference: 1
+Query Difference: 2
diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
new file mode 100644
index 00000000000..3a01aa63d87
--- /dev/null
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
@@ -0,0 +1,54 @@
+-- Tags: distributed
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+# CREATE TABLE local (x UInt8) Engine=Memory;
+# CREATE TABLE distributed ON CLUSTER cluster (p Date, i Int32) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), x)
+$CLICKHOUSE_CLIENT -n -q "
+DROP TABLE IF EXISTS local;
+DROP TABLE IF EXISTS distributed;
+CREATE TABLE local (x UInt8) Engine=Memory;
+CREATE TABLE distributed AS local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), local, x);
+INSERT INTO distributed SELECT number FROM numbers(10);
+SYSTEM FLUSH DISTRIBUTED distributed;
+"
+echo "Local situation"
+# before SELECT * FROM local
+query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'InitialQuery'")
+query_countQ=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'Query'")
+
+# Execute SELECT * FROM local
+$CLICKHOUSE_CLIENT -q "SELECT * FROM local" > /dev/null
+
+# Counts after SELECT * FROM local
+After_query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'InitialQuery'")
+After_query_countQ=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'Query'")
+
+# Calculate the differences
+Initial_query_diff=$(($After_query_countI-$query_countI-2))
+query_diff=$(($After_query_countQ-$query_countQ-2))
+
+echo "Initial Query Difference: $Initial_query_diff"
+echo "Query Difference: $query_diff"
+echo "Distributed situation"
+
+# before SELECT * FROM distributed
+query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'InitialQuery'")
+query_countQ=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'Query'")
+
+# Execute SELECT * FROM distributed
+$CLICKHOUSE_CLIENT -q "SELECT * FROM distributed" > /dev/null
+
+# Counts after SELECT * FROM distributed
+After_query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'InitialQuery'")
+After_query_countQ=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'Query'")
+
+# Calculate the differences
+Initial_query_diff=$(($After_query_countI-$query_countI-2))
+query_diff=$(($After_query_countQ-$query_countQ-2))
+
+echo "Initial Query Difference: $Initial_query_diff"
+echo "Query Difference: $query_diff"
+
+
diff --git a/tests/queries/0_stateless/02950_initialquery_event.reference b/tests/queries/0_stateless/02950_initialquery_event.reference
deleted file mode 100644
index 7ad67a1e7e4..00000000000
--- a/tests/queries/0_stateless/02950_initialquery_event.reference
+++ /dev/null
@@ -1 +0,0 @@
-InitialQuery	6	Same as Query, but only counts initial queries (see is_initial_query).
diff --git a/tests/queries/0_stateless/02950_initialquery_event.sql b/tests/queries/0_stateless/02950_initialquery_event.sql
deleted file mode 100644
index 2b03607c5c7..00000000000
--- a/tests/queries/0_stateless/02950_initialquery_event.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT * FROM system.events where event = 'InitialQuery'
\ No newline at end of file

From 1464c3d1aab8c6ecdc369facceb1b9f6cf4b36fb Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 15:13:21 +0800
Subject: [PATCH 036/884] feat: Add initial query event

---
 src/Databases/DatabaseReplicatedWorker.cpp                 | 3 +--
 .../02950_distributed_initial_query_event.reference        | 2 +-
 .../0_stateless/02950_distributed_initial_query_event.sh   | 7 +++----
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 317cda3cd3d..2056b403ff6 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -6,7 +6,6 @@
 #include <filesystem>
 
 namespace fs = std::filesystem;
-namespace ProfileEvents
 
 namespace DB
 {
@@ -265,7 +264,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     chassert(!task->entry.query.empty());
     assert(!zookeeper->exists(task->getFinishedNodePath()));
     task->is_initial_query = true;
-    
+
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
     UInt64 timeout = query_context->getSettingsRef().database_replicated_initial_query_timeout_sec;
     {
diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.reference b/tests/queries/0_stateless/02950_distributed_initial_query_event.reference
index af8542c7204..cf10427e9b3 100644
--- a/tests/queries/0_stateless/02950_distributed_initial_query_event.reference
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.reference
@@ -3,4 +3,4 @@ Initial Query Difference: 1
 Query Difference: 1
 Distributed situation
 Initial Query Difference: 1
-Query Difference: 2
+Query Difference: 3
diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
index 3a01aa63d87..c8a955c4fe5 100644
--- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
@@ -1,4 +1,5 @@
--- Tags: distributed
+#!/usr/bin/env bash
+# Tags:no-parallel shard
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -38,7 +39,7 @@ query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE even
 query_countQ=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'Query'")
 
 # Execute SELECT * FROM distributed
-$CLICKHOUSE_CLIENT -q "SELECT * FROM distributed" > /dev/null
+$CLICKHOUSE_CLIENT -q "SELECT * FROM distributed SETTINGS prefer_localhost_replica = 0" > /dev/null
 
 # Counts after SELECT * FROM distributed
 After_query_countI=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.events WHERE event = 'InitialQuery'")
@@ -50,5 +51,3 @@ query_diff=$(($After_query_countQ-$query_countQ-2))
 
 echo "Initial Query Difference: $Initial_query_diff"
 echo "Query Difference: $query_diff"
-
-

From 22e1bcb9d638d5df0c43585b1d78228beedb0dc8 Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 16:12:10 +0800
Subject: [PATCH 037/884] feat:add InitialQuery event

Signed-off-by: una <dengmengda@gmail.com>
---
 .../0_stateless/02950_distributed_initial_query_event.sh        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
index c8a955c4fe5..ddd0fb1e408 100644
--- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags:no-parallel shard
+# Tags:no-parallel, shard
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From a6f2eaf5a6ba2a26943d0c1c53c7cf7460a7471d Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 16:19:06 +0800
Subject: [PATCH 038/884] fix:use , to split tags

Signed-off-by: una <dengmengda@gmail.com>
---
 .../0_stateless/02950_distributed_initial_query_event.sh        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
index ddd0fb1e408..7f690a681c4 100644
--- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
+++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags:no-parallel, shard
+# Tags:no-parallel,shard
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From d46d91452176414426e40f598a7a1aa989f1a584 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 27 Dec 2023 10:28:52 +0100
Subject: [PATCH 039/884] Updated thread name

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  8 +-
 src/Backups/BackupIO_AzureBlobStorage.h       | 81 +++++++++----------
 .../copyAzureBlobStorageFile.cpp              | 25 +++---
 3 files changed, 59 insertions(+), 55 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index d41d23e3c36..a1fd5bd8327 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -35,7 +35,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
     : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderAzureBlobStorage"))
-    , data_source_description{DataSourceType::AzureBlobStorage, "AzureBlobStorage", false, false}
+    , data_source_description{DataSourceType::AzureBlobStorage, configuration_.container, false, false}
     , configuration(configuration_)
 {
     client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
@@ -160,7 +160,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
     : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterAzureBlobStorage"))
-    , data_source_description{DataSourceType::AzureBlobStorage, "AzureBlobStorage", false, false}
+    , data_source_description{DataSourceType::AzureBlobStorage,configuration_.container, false, false}
     , configuration(configuration_)
 {
     client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
@@ -209,7 +209,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
                 settings,
                 read_settings,
                 {},
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterAzureBlobStorage"));
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
         }
     }
@@ -221,7 +221,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
 void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, {},
-                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterAzureBlobStorage"));
+                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 6ef66fc432d..65affb9f079 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -12,57 +12,54 @@
 namespace DB
 {
 
-//    using AzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
-
 /// Represents a backup stored to Azure
-    class BackupReaderAzureBlobStorage : public BackupReaderDefault
-    {
-    public:
-        BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
-        ~BackupReaderAzureBlobStorage() override;
+class BackupReaderAzureBlobStorage : public BackupReaderDefault
+{
+public:
+    BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+    ~BackupReaderAzureBlobStorage() override;
 
-        bool fileExists(const String & file_name) override;
-        UInt64 getFileSize(const String & file_name) override;
-        std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
+    bool fileExists(const String & file_name) override;
+    UInt64 getFileSize(const String & file_name) override;
+    std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
 
-        void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
-                            DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
+    void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
+                        DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
 
-    private:
-        const DataSourceDescription data_source_description;
-        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
-        StorageAzureBlob::Configuration configuration;
-        std::unique_ptr<AzureObjectStorage> object_storage;
-        std::shared_ptr<AzureObjectStorageSettings> settings;
-    };
+private:
+    const DataSourceDescription data_source_description;
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+    StorageAzureBlob::Configuration configuration;
+    std::unique_ptr<AzureObjectStorage> object_storage;
+    std::shared_ptr<AzureObjectStorageSettings> settings;
+};
 
+class BackupWriterAzureBlobStorage : public BackupWriterDefault
+{
+public:
+    BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+    ~BackupWriterAzureBlobStorage() override;
 
-    class BackupWriterAzureBlobStorage : public BackupWriterDefault
-    {
-    public:
-        BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
-        ~BackupWriterAzureBlobStorage() override;
+    bool fileExists(const String & file_name) override;
+    UInt64 getFileSize(const String & file_name) override;
+    std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
 
-        bool fileExists(const String & file_name) override;
-        UInt64 getFileSize(const String & file_name) override;
-        std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
+    void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
+    void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
+                          bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
 
-        void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
-        void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
-                              bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
+    void removeFile(const String & file_name) override;
+    void removeFiles(const Strings & file_names) override;
 
-        void removeFile(const String & file_name) override;
-        void removeFiles(const Strings & file_names) override;
-
-    private:
-        std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
-        void removeFilesBatch(const Strings & file_names);
-        const DataSourceDescription data_source_description;
-        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
-        StorageAzureBlob::Configuration configuration;
-        std::unique_ptr<AzureObjectStorage> object_storage;
-        std::shared_ptr<AzureObjectStorageSettings> settings;
-    };
+private:
+    std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
+    void removeFilesBatch(const Strings & file_names);
+    const DataSourceDescription data_source_description;
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+    StorageAzureBlob::Configuration configuration;
+    std::unique_ptr<AzureObjectStorage> object_storage;
+    std::shared_ptr<AzureObjectStorageSettings> settings;
+};
 
 }
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index bf0bcac664b..0a0a080b5cb 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -22,6 +22,11 @@ namespace ProfileEvents
     extern const Event DiskAzureUploadPart;
 }
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 
 namespace DB
 {
@@ -44,7 +49,8 @@ namespace
             std::shared_ptr<AzureObjectStorageSettings> settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
-            bool for_disk_azure_blob_storage_)
+            bool for_disk_azure_blob_storage_,
+            const Poco::Logger * log_)
             : create_read_buffer(create_read_buffer_)
             , client(client_)
             , offset (offset_)
@@ -55,7 +61,7 @@ namespace
             , object_metadata(object_metadata_)
             , schedule(schedule_)
             , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
-            , log(&Poco::Logger::get("azureBlobStorageUploadHelper"))
+            , log(log_)
             , max_single_part_upload_size(settings_.get()->max_single_part_upload_size)
         {
         }
@@ -179,11 +185,11 @@ namespace
                 try
                 {
                     auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
-                    auto buffer = std::make_unique<StdStreamFromReadBuffer>(std::move(read_buffer), part_size);
                     task->data = new char[part_size];
                     task->size = part_size;
-                    buffer->read(task->data,part_size);
-                    task->block_id = getRandomASCIIString(64);
+                    size_t n = read_buffer->read(task->data,part_size);
+                    if (n != part_size)
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size");
 
                     schedule([this, task, task_finish_notify]()
                     {
@@ -208,9 +214,10 @@ namespace
             {
                 UploadPartTask task;
                 auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
-                auto buffer = std::make_unique<StdStreamFromReadBuffer>(std::move(read_buffer), part_size);
                 task.data = new char[part_size];
-                buffer->read(task.data,part_size);
+                size_t n = read_buffer->read(task.data,part_size);
+                if (n != part_size)
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size");
                 task.size = part_size;
                 processUploadTask(task);
                 block_ids.emplace_back(task.block_id);
@@ -274,7 +281,7 @@ void copyDataToAzureBlobStorageFile(
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
-    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage};
+    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
     helper.performCopy();
 }
 
@@ -314,7 +321,7 @@ void copyAzureBlobStorageFile(
             settings->max_single_download_retries);
         };
 
-        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage};
+        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
         helper.performCopy();
     }
 }

From 0181bab23c38c2d1c15f199d522a4743b11586d6 Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 19:59:23 +0800
Subject: [PATCH 040/884] fix:style

Signed-off-by: una <dengmengda@gmail.com>
---
 src/Interpreters/InterpreterFactory.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp
index fdf7e8ebfbb..c5d7f0f891c 100644
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@@ -138,7 +138,6 @@ namespace ErrorCodes
 std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMutablePtr context, const SelectQueryOptions & options)
 {
     ProfileEvents::increment(ProfileEvents::Query);
-    
     if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
         ProfileEvents::increment(ProfileEvents::InitialQuery);
     /// SELECT and INSERT query will handle QueriesWithSubqueries on their own.

From 2c1513540768eaed34a13fd643c4ace491421c0e Mon Sep 17 00:00:00 2001
From: una <dengmengda@gmail.com>
Date: Wed, 27 Dec 2023 20:53:30 +0800
Subject: [PATCH 041/884] fix test-file permissions

Signed-off-by: una <dengmengda@gmail.com>
---
 .../queries/0_stateless/02950_distributed_initial_query_event.sh  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tests/queries/0_stateless/02950_distributed_initial_query_event.sh

diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh
old mode 100644
new mode 100755

From 32ff152f2d7e4798a7bbc916808cc9ca883cf13e Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Thu, 28 Dec 2023 11:41:06 +0000
Subject: [PATCH 042/884] Support negtive position arguments

---
 .../replaceForPositionalArguments.cpp         | 24 ++++-
 .../0_stateless/01798_having_push_down.sql    |  3 +-
 .../02006_test_positional_arguments.reference | 94 +++++++++++++++++++
 .../02006_test_positional_arguments.sql       | 21 +++++
 .../02932_group_by_null_fuzzer.sql            |  1 +
 5 files changed, 137 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index 241dd7cf92c..bea87ad913a 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -27,14 +27,28 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
         return false;
 
     auto which = ast_literal->value.getType();
-    if (which != Field::Types::UInt64)
+    if (which != Field::Types::UInt64 && which != Field::Types::Int64)
         return false;
 
-    auto pos = ast_literal->value.get<UInt64>();
+    UInt64 pos;
+
+    if (which == Field::Types::UInt64)
+    {
+        pos = ast_literal->value.get<UInt64>();
+    }
+    else if (which == Field::Types::Int64)
+    {
+        auto value = ast_literal->value.get<Int64>();
+        pos = value > 0 ? value : columns.size() + value + 1;
+    }
+    else
+    {
+        return false;
+    }
+
     if (!pos || pos > columns.size())
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Positional argument out of bounds: {} (expected in range [1, {}]",
-                        pos, columns.size());
+        throw Exception(
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size());
 
     const auto & column = columns[--pos];
     if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
diff --git a/tests/queries/0_stateless/01798_having_push_down.sql b/tests/queries/0_stateless/01798_having_push_down.sql
index b3a77c8f5b5..c0c3447f5ab 100644
--- a/tests/queries/0_stateless/01798_having_push_down.sql
+++ b/tests/queries/0_stateless/01798_having_push_down.sql
@@ -8,11 +8,12 @@ SELECT sum(c0 = 0), min(c0 + 1), sum(c0 + 2) FROM t_having
 GROUP BY c0 HAVING c0 = 0
 SETTINGS enable_optimize_predicate_expression=0;
 
+SET enable_positional_arguments=0;
+
 SELECT c0 + -1, sum(intDivOrZero(intDivOrZero(NULL, NULL), '2'), intDivOrZero(10000000000., intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), 10), NULL))) FROM t_having GROUP BY c0 = 2, c0 = 10, intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), NULL), NULL), c0 HAVING c0 = 2 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT sum(c0 + 257) FROM t_having GROUP BY c0 = -9223372036854775808, NULL, -2147483649, c0 HAVING c0 = -9223372036854775808 SETTINGS enable_optimize_predicate_expression = 0;
 
-SET enable_positional_arguments=0;
 SELECT c0 + -2, c0 + -9223372036854775807, c0 = NULL FROM t_having GROUP BY c0 = 0.9998999834060669, 1023, c0 HAVING c0 = 0.9998999834060669 SETTINGS enable_optimize_predicate_expression = 0;
 
 DROP TABLE t_having;
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference
index 40100e8d5be..079bd071103 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.reference
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference
@@ -3,18 +3,50 @@ select x3, x2, x1 from test order by 1;
 1	100	100
 10	1	10
 100	10	1
+select x3, x2, x1 from test order by -3;
+1	100	100
+10	1	10
+100	10	1
 select x3, x2, x1 from test order by x3;
 1	100	100
 10	1	10
 100	10	1
+select x3, x2, x1 from test order by 3;
+100	10	1
+10	1	10
+1	100	100
+select x3, x2, x1 from test order by -1;
+100	10	1
+10	1	10
+1	100	100
+select x3, x2, x1 from test order by x1;
+100	10	1
+10	1	10
+1	100	100
 select x3, x2, x1 from test order by 1 desc;
 100	10	1
 10	1	10
 1	100	100
+select x3, x2, x1 from test order by -3 desc;
+100	10	1
+10	1	10
+1	100	100
 select x3, x2, x1 from test order by x3 desc;
 100	10	1
 10	1	10
 1	100	100
+select x3, x2, x1 from test order by 3 desc;
+1	100	100
+10	1	10
+100	10	1
+select x3, x2, x1 from test order by -1 desc;
+1	100	100
+10	1	10
+100	10	1
+select x3, x2, x1 from test order by x1 desc;
+1	100	100
+10	1	10
+100	10	1
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 1	100
@@ -54,6 +86,20 @@ SELECT
     x1
 FROM test
 ORDER BY x3 + 1 ASC
+explain syntax select x3, x2, x1 from test order by -1;
+SELECT
+    x3,
+    x2,
+    x1
+FROM test
+ORDER BY x1 ASC
+explain syntax select x3 + 1, x2, x1 from test order by -1;
+SELECT
+    x3 + 1,
+    x2,
+    x1
+FROM test
+ORDER BY x1 ASC
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
 SELECT
     x3,
@@ -62,6 +108,14 @@ SELECT
     x1
 FROM test
 ORDER BY x3 - x2 ASC
+explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
+SELECT
+    x3,
+    x3 - x2,
+    x2,
+    x1
+FROM test
+ORDER BY x2 ASC
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
 SELECT
     x3,
@@ -69,12 +123,28 @@ SELECT
     x1 + x2
 FROM test
 ORDER BY if(x3 > 10, x3, x1 + x2) ASC
+explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
+SELECT
+    x3,
+    if(x3 > 10, x3, x1 + x2),
+    x1 + x2
+FROM test
+ORDER BY if(x3 > 10, x3, x1 + x2) ASC
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
 SELECT
     max(x1),
     x2
 FROM test
 GROUP BY x2
+ORDER BY
+    max(x1) ASC,
+    x2 ASC
+explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
+SELECT
+    max(x1),
+    x2
+FROM test
+GROUP BY x2
 ORDER BY
     max(x1) ASC,
     x2 ASC
@@ -83,16 +153,34 @@ SELECT
     1 + greatest(x1, 1),
     x2
 FROM test
+GROUP BY
+    1 + greatest(x1, 1),
+    x2
+explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
+SELECT
+    1 + greatest(x1, 1),
+    x2
+FROM test
 GROUP BY
     1 + greatest(x1, 1),
     x2
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
+select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
+select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 explain syntax select x1 + x3, x3 from test group by 1, 2;
 SELECT
     x1 + x3,
     x3
 FROM test
+GROUP BY
+    x1 + x3,
+    x3
+explain syntax select x1 + x3, x3 from test group by -2, -1;
+SELECT
+    x1 + x3,
+    x3
+FROM test
 GROUP BY
     x1 + x3,
     x3
@@ -102,8 +190,14 @@ select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2,
 1	2	10	100
 10	20	1	10
 100	200	100	1
+select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
+1	2	10	100
+10	20	1	10
+100	200	100	1
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
 44	88	13	14	15	16
+select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
+44	88	13	14	15	16
 explain syntax select plus(1, 1) as a group by a;
 SELECT 1 + 1 AS a
 GROUP BY a
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql
index 159ad6bd427..6f427e0298d 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.sql
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql
@@ -9,11 +9,21 @@ insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 
 -- { echo }
 select x3, x2, x1 from test order by 1;
+select x3, x2, x1 from test order by -3;
 select x3, x2, x1 from test order by x3;
 
+select x3, x2, x1 from test order by 3;
+select x3, x2, x1 from test order by -1;
+select x3, x2, x1 from test order by x1;
+
 select x3, x2, x1 from test order by 1 desc;
+select x3, x2, x1 from test order by -3 desc;
 select x3, x2, x1 from test order by x3 desc;
 
+select x3, x2, x1 from test order by 3 desc;
+select x3, x2, x1 from test order by -1 desc;
+select x3, x2, x1 from test order by x1 desc;
+
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 select x3, x2 from test group by 1, 2 order by x3;
@@ -25,21 +35,32 @@ select x1, x2, x3 from test order by 3 limit 1 by 1;
 
 explain syntax select x3, x2, x1 from test order by 1;
 explain syntax select x3 + 1, x2, x1 from test order by 1;
+explain syntax select x3, x2, x1 from test order by -1;
+explain syntax select x3 + 1, x2, x1 from test order by -1;
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
+explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
+explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
+explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
 explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2;
+explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
 
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
+select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
+select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 
 explain syntax select x1 + x3, x3 from test group by 1, 2;
+explain syntax select x1 + x3, x3 from test group by -2, -1;
 
 create table test2(x1 Int, x2 Int, x3 Int) engine=Memory;
 insert into test2 values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 4 desc, 3 asc;
+select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
 
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
+select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
 
 explain syntax select plus(1, 1) as a group by a;
 select substr('aaaaaaaaaaaaaa', 8) as a  group by a order by a;
diff --git a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
index 0c28c120d40..603c7783ef8 100644
--- a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
+++ b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
@@ -1,5 +1,6 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/43202
 -- Queries are generated by the fuzzer, so don't expect them to make sense
+SET enable_positional_arguments=0;
 SELECT NULL, '' FROM (SELECT toNullable(''), NULL AS key GROUP BY GROUPING SETS ((NULL))) AS s1 ALL LEFT JOIN (SELECT '' AS key, NULL AS value GROUP BY GROUPING SETS (('')) WITH TOTALS UNION ALL SELECT NULL AS key, toNullable(NULL) AS value GROUP BY '', NULL, '' WITH TOTALS) AS s2 USING (key);
 SELECT NULL GROUP BY NULL WITH TOTALS;
 SELECT 1048575, NULL, b FROM (SELECT '25.5' AS a, NULL, NULL AS b GROUP BY GROUPING SETS ((0.0001)) WITH TOTALS) AS js1 ANY RIGHT JOIN (SELECT NULL AS a, NULL AS b WHERE NULL GROUP BY NULL, -9223372036854775807 WITH CUBE WITH TOTALS UNION ALL SELECT NULL AS a, NULL AS b GROUP BY 1, '21474836.46' WITH TOTALS) AS js2 USING (a, b) ORDER BY nan DESC NULLS LAST, '9223372036854775807' DESC NULLS LAST, a ASC NULLS LAST;

From e2f4219c12c216ab32a267b153969b758126a077 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Thu, 28 Dec 2023 12:22:30 +0000
Subject: [PATCH 043/884] Fix

---
 src/Interpreters/TreeOptimizer.cpp                  |  7 +++----
 .../02943_positional_arguments_bugs.reference       | 11 ++++++++++-
 .../0_stateless/02943_positional_arguments_bugs.sql | 13 +++++++------
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 729e2ed6007..57dba3eef89 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -76,11 +76,10 @@ const std::unordered_set<String> possibly_injective_function_names
   */
 void appendUnusedGroupByColumn(ASTSelectQuery * select_query)
 {
-    /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
-    /// Also start unused_column integer must not intersect with ([1, source_columns.size()])
-    /// might be in positional GROUP BY.
+    /// Since ASTLiteral is different from ASTIdentifier, so we can use a special constant String Literal for this,
+    /// and do not need to worry about it conflict with the name of the column in the table.
     select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared<ASTExpressionList>());
-    select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>(static_cast<Int64>(-1)));
+    select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>("__unused_group_by_column"));
 }
 
 /// Eliminates injective function calls and constant expressions from group by statement.
diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
index 702e1261186..47e8df9e382 100644
--- a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
@@ -1,2 +1,11 @@
-45	1
+0	0
+4	4
+3	3
+2	2
+5	5
+1	1
+6	6
+7	7
+9	9
+8	8
 processed	99	0
diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
index b8cf73da42d..8cc3fb4b17d 100644
--- a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
@@ -2,18 +2,19 @@
 DROP TABLE IF EXISTS t;
 CREATE TABLE t
 (
-    `n` int
+    `n` int,
+	`__unused_group_by_column` int
 )
-    ENGINE = MergeTree
-        ORDER BY n AS
-SELECT *
+ENGINE = MergeTree
+ORDER BY n AS
+SELECT number, number
 FROM numbers(10);
 
 SELECT
     sum(n),
-    1 AS x
+    __unused_group_by_column 
 FROM t
-GROUP BY x;
+GROUP BY __unused_group_by_column;
 
 SELECT
     'processed' AS type,

From 2e9cdd17ef136f064042b541dbc68ef64ba8194f Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Thu, 28 Dec 2023 14:08:14 +0000
Subject: [PATCH 044/884] Fix flaky test

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 26 ++++++++++++++-----
 .../02943_positional_arguments_bugs.reference | 11 ++++----
 .../02943_positional_arguments_bugs.sql       |  6 +++--
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 3290d918a8b..9ec6d9e358c 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2156,19 +2156,31 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
             node_to_replace = &sort_node->getExpression();
 
         auto * constant_node = (*node_to_replace)->as<ConstantNode>();
-        if (!constant_node || constant_node->getValue().getType() != Field::Types::UInt64)
+
+        if (!constant_node
+            || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
             continue;
 
-        UInt64 positional_argument_number = constant_node->getValue().get<UInt64>();
-        if (positional_argument_number == 0 || positional_argument_number > projection_nodes.size())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+        UInt64 pos;
+        if (constant_node->getValue().getType() == Field::Types::UInt64)
+        {
+            pos = constant_node->getValue().get<UInt64>();
+        }
+        else // Int64
+        {
+            auto value = constant_node->getValue().get<Int64>();
+            pos = value > 0 ? value : projection_nodes.size() + value + 1;
+        }
+
+        if (!pos || pos > projection_nodes.size())
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
                 "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}",
-                positional_argument_number,
+                pos,
                 projection_nodes.size(),
                 scope.scope_node->formatASTForErrorMessage());
 
-        --positional_argument_number;
-        *node_to_replace = projection_nodes[positional_argument_number];
+        *node_to_replace = projection_nodes[--pos];
     }
 }
 
diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
index 47e8df9e382..08310b7cf27 100644
--- a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
@@ -1,11 +1,12 @@
 0	0
-4	4
-3	3
-2	2
-5	5
 1	1
+2	2
+3	3
+4	4
+5	5
 6	6
 7	7
-9	9
 8	8
+9	9
+45	1
 processed	99	0
diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
index 8cc3fb4b17d..9b1b872ae40 100644
--- a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
@@ -3,7 +3,7 @@ DROP TABLE IF EXISTS t;
 CREATE TABLE t
 (
     `n` int,
-	`__unused_group_by_column` int
+    `__unused_group_by_column` int
 )
 ENGINE = MergeTree
 ORDER BY n AS
@@ -14,7 +14,9 @@ SELECT
     sum(n),
     __unused_group_by_column 
 FROM t
-GROUP BY __unused_group_by_column;
+GROUP BY __unused_group_by_column ORDER BY __unused_group_by_column;
+
+SELECT sum(n), 1 as x from t group by x;
 
 SELECT
     'processed' AS type,

From b70ff6d8ea71d4633cdcdbe3ef486707e70c1abb Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 1 Jan 2024 11:02:57 +0100
Subject: [PATCH 045/884] Fix build

---
 src/Backups/BackupIO_AzureBlobStorage.cpp | 33 +++++++++++++++++++++--
 src/Backups/BackupIO_AzureBlobStorage.h   |  2 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index a1fd5bd8327..bd4efcf63ae 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -218,10 +218,39 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
     BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
 }
 
+void BackupWriterAzureBlobStorage::copyFile(const String & destination, const String & source, size_t size)
+{
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client;
+    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client;
+    StorageAzureBlob::Configuration src_configuration = configuration;
+    src_configuration.container = source;
+    src_client = StorageAzureBlob::createClient(src_configuration, /* is_read_only */ false);
+
+    StorageAzureBlob::Configuration dest_configuration = configuration;
+    dest_configuration.container = destination;
+    dest_client = StorageAzureBlob::createClient(dest_configuration, /* is_read_only */ false);
+
+    LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
+    copyAzureBlobStorageFile(
+       src_client,
+       dest_client,
+       configuration.container,
+       fs::path(configuration.blob_path),
+       0,
+       size,
+       /* dest_bucket= */ destination,
+       /* dest_key= */ configuration.blob_path,
+       settings,
+       read_settings,
+       {},
+       threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
+       /* for_disk_azure_blob_storage= */ true);
+}
+
 void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, {},
-                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
 BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
@@ -257,7 +286,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
     RelativePathsWithMetadata children;
     object_storage->listObjects(key,children,/*max_keys*/0);
     if (children.empty())
-        throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object {} must exist");
+        throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
     return children[0].metadata.size_bytes;
 }
 
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 65affb9f079..87a6c3ef675 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -48,6 +48,8 @@ public:
     void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
                           bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
 
+    void copyFile(const String & destination, const String & source, size_t size) override;
+
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
 

From 4122de97213d835de5202d4ca741b4972973884b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 2 Jan 2024 20:19:01 +0100
Subject: [PATCH 046/884] Updated tests and added settings

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  6 +-
 .../AzureBlobStorage/AzureBlobStorageAuth.cpp |  5 +-
 .../AzureBlobStorage/AzureObjectStorage.h     | 11 ++-
 .../copyAzureBlobStorageFile.cpp              | 68 +++++++++++++++++--
 src/Storages/StorageAzureBlob.cpp             |  2 +-
 .../configs/config.xml                        | 11 ---
 .../configs/disable_profilers.xml             | 13 ----
 .../configs/users.xml                         |  8 ---
 .../test.py                                   |  2 -
 9 files changed, 80 insertions(+), 46 deletions(-)
 delete mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
 delete mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
 delete mode 100644 tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index bd4efcf63ae..15e8e92a85d 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -139,7 +139,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                 settings,
                 read_settings,
                 object_attributes,
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderAzureBlobStorage"),
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
                 /* for_disk_azure_blob_storage= */ true);
 
             return file_size;
@@ -209,7 +209,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
                 settings,
                 read_settings,
                 {},
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
             return; /// copied!
         }
     }
@@ -243,7 +243,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        settings,
        read_settings,
        {},
-       threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
+       threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
        /* for_disk_azure_blob_storage= */ true);
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
index 6075b385a6c..9e703d6fc5e 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
@@ -164,7 +164,10 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
         config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
         config.getInt(config_prefix + ".max_single_read_retries", 3),
         config.getInt(config_prefix + ".max_single_download_retries", 3),
-        config.getInt(config_prefix + ".list_object_keys_size", 1000)
+        config.getInt(config_prefix + ".list_object_keys_size", 1000),
+        config.getUInt64(config_prefix + ".min_upload_part_size", 16 * 1024 * 1024),
+        config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
+        config.getUInt64(config_prefix + ".max_part_number", 10000)
     );
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 8e3d50418d3..55c81b4b7d9 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -24,12 +24,18 @@ struct AzureObjectStorageSettings
         uint64_t min_bytes_for_seek_,
         int max_single_read_retries_,
         int max_single_download_retries_,
-        int list_object_keys_size_)
+        int list_object_keys_size_,
+        size_t min_upload_part_size_,
+        size_t max_upload_part_size_,
+        size_t max_part_number_)
         : max_single_part_upload_size(max_single_part_upload_size_)
         , min_bytes_for_seek(min_bytes_for_seek_)
         , max_single_read_retries(max_single_read_retries_)
         , max_single_download_retries(max_single_download_retries_)
         , list_object_keys_size(list_object_keys_size_)
+        , min_upload_part_size(min_upload_part_size_)
+        , max_upload_part_size(max_upload_part_size_)
+        , max_part_number(max_part_number_)
     {
     }
 
@@ -40,6 +46,9 @@ struct AzureObjectStorageSettings
     size_t max_single_read_retries = 3;
     size_t max_single_download_retries = 3;
     int list_object_keys_size = 1000;
+    size_t min_upload_part_size = 16 * 1024 * 1024;
+    size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
+    size_t max_part_number = 10000;
 };
 
 using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 0a0a080b5cb..5ca30fa8071 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -22,15 +22,17 @@ namespace ProfileEvents
     extern const Event DiskAzureUploadPart;
 }
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int INVALID_CONFIG_PARAMETER;
+}
+
+
 size_t max_single_operation_copy_size = 256 * 1024 * 1024;
 
 
@@ -106,6 +108,60 @@ namespace
         std::mutex bg_tasks_mutex;
         std::condition_variable bg_tasks_condvar;
 
+        void calculatePartSize()
+        {
+            if (!total_size)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen");
+
+            auto max_part_number = settings.get()->max_part_number;
+            auto min_upload_part_size = settings.get()->min_upload_part_size;
+            auto max_upload_part_size = settings.get()->max_upload_part_size;
+
+            if (!max_part_number)
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0");
+            else if (!min_upload_part_size)
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "min_upload_part_size must not be 0");
+            else if (max_upload_part_size < min_upload_part_size)
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size");
+
+            size_t part_size = min_upload_part_size;
+            size_t num_parts = (total_size + part_size - 1) / part_size;
+
+            if (num_parts > max_part_number)
+            {
+                part_size = (total_size + max_part_number - 1) / max_part_number;
+                num_parts = (total_size + part_size - 1) / part_size;
+            }
+
+            if (part_size > max_upload_part_size)
+            {
+                part_size = max_upload_part_size;
+                num_parts = (total_size + part_size - 1) / part_size;
+            }
+
+            if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size)
+            {
+                String msg;
+                if (num_parts < 1)
+                    msg = "Number of parts is zero";
+                else if (num_parts > max_part_number)
+                    msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number);
+                else if (part_size < min_upload_part_size)
+                    msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size);
+                else
+                    msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size);
+
+                throw Exception(
+                    ErrorCodes::INVALID_CONFIG_PARAMETER,
+                    "{} while writing {} bytes to AzureBlobStorage. Check max_part_number = {}, "
+                    "min_upload_part_size = {}, max_upload_part_size = {}",
+                    msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size);
+            }
+
+            /// We've calculated the size of a normal part (the final part can be smaller).
+            normal_part_size = part_size;
+        }
+
     public:
         void performCopy()
         {
@@ -120,7 +176,7 @@ namespace
 
         void performMultipartUpload()
         {
-            normal_part_size = 1024;
+            calculatePartSize();
 
             size_t position = offset;
             size_t end_position = offset + total_size;
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 1b28a2c2fac..f1070c8c31e 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1139,7 +1139,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
     QueryPipelineBuilder builder;
     std::shared_ptr<ISource> source;
     std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
+        std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
         ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt;
     if (num_rows_from_cache)
     {
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
deleted file mode 100644
index 5725dce40cd..00000000000
--- a/tests/integration/test_backup_restore_azure_blob_storage/configs/config.xml
+++ /dev/null
@@ -1,11 +0,0 @@
-<clickhouse>
-    <background_processing_pool_thread_sleep_seconds>1</background_processing_pool_thread_sleep_seconds>
-    <background_processing_pool_thread_sleep_seconds_random_part>0</background_processing_pool_thread_sleep_seconds_random_part>
-    <background_processing_pool_thread_sleep_seconds_if_nothing_to_do>0.0</background_processing_pool_thread_sleep_seconds_if_nothing_to_do>
-    <background_processing_pool_task_sleep_seconds_when_no_work_min>0</background_processing_pool_task_sleep_seconds_when_no_work_min>
-    <background_processing_pool_task_sleep_seconds_when_no_work_max>1</background_processing_pool_task_sleep_seconds_when_no_work_max>
-    <background_processing_pool_task_sleep_seconds_when_no_work_multiplier>1</background_processing_pool_task_sleep_seconds_when_no_work_multiplier>
-    <background_processing_pool_task_sleep_seconds_when_no_work_random_part>0</background_processing_pool_task_sleep_seconds_when_no_work_random_part>
-    <backup_threads>16</backup_threads>
-    <restore_threads>16</restore_threads>
-</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
deleted file mode 100644
index b74bb1502ce..00000000000
--- a/tests/integration/test_backup_restore_azure_blob_storage/configs/disable_profilers.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-<!-- Sometime azurite is super slow, profiler make it even worse -->
-<clickhouse>
-    <profiles>
-        <default>
-            <query_profiler_real_time_period_ns>0</query_profiler_real_time_period_ns>
-            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
-            <load_marks_asynchronously>0</load_marks_asynchronously>
-            <backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
-            <backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
-            <backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml b/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml
deleted file mode 100644
index c12eb2f79f4..00000000000
--- a/tests/integration/test_backup_restore_azure_blob_storage/configs/users.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<clickhouse>
-    <users>
-        <default>
-            <password></password>
-            <profile>default</profile>
-        </default>
-    </users>
-</clickhouse>
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index 0a48d3523f0..06c18d7468f 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -24,8 +24,6 @@ def cluster():
         cluster = ClickHouseCluster(__file__)
         cluster.add_instance(
             "node",
-            main_configs=["configs/config.xml"],
-            user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
             with_azurite=True,
         )
         cluster.start()

From df221f7db65fd17af6a71704f756e47ceec7a928 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 3 Jan 2024 11:35:06 +0100
Subject: [PATCH 047/884] Renamed Bucket-Key to Container-Blob

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     | 14 +++---
 .../copyAzureBlobStorageFile.cpp              | 44 +++++++++----------
 .../copyAzureBlobStorageFile.h                | 10 ++---
 3 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 15e8e92a85d..de40fc6b33b 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -134,8 +134,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                 fs::path(configuration.blob_path) / path_in_backup,
                 0,
                 file_size,
-                /* dest_bucket= */ blob_path[1],
-                /* dest_key= */ blob_path[0],
+                /* dest_container */ blob_path[1],
+                /* dest_path */ blob_path[0],
                 settings,
                 read_settings,
                 object_attributes,
@@ -178,7 +178,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
     auto source_data_source_description = src_disk->getDataSourceDescription();
     if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
     {
-        /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage bucket.
+        /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
         /// In this case we can't use the native copy.
         if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
         {
@@ -200,8 +200,8 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
             copyAzureBlobStorageFile(
                 src_client,
                 client,
-                /* src_bucket */ blob_path[1],
-                /* src_key= */ blob_path[0],
+                /* src_container */ blob_path[1],
+                /* src_path */ blob_path[0],
                 start_pos,
                 length,
                 configuration.container,
@@ -238,8 +238,8 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        fs::path(configuration.blob_path),
        0,
        size,
-       /* dest_bucket= */ destination,
-       /* dest_key= */ configuration.blob_path,
+       /* dest_container */ destination,
+       /* dest_path */ configuration.blob_path,
        settings,
        read_settings,
        {},
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 5ca30fa8071..df1341efdd1 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -46,8 +46,8 @@ namespace
             std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client_,
             size_t offset_,
             size_t total_size_,
-            const String & dest_bucket_,
-            const String & dest_key_,
+            const String & dest_container_,
+            const String & dest_blob_,
             std::shared_ptr<AzureObjectStorageSettings> settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
@@ -57,8 +57,8 @@ namespace
             , client(client_)
             , offset (offset_)
             , total_size (total_size_)
-            , dest_bucket(dest_bucket_)
-            , dest_key(dest_key_)
+            , dest_container(dest_container_)
+            , dest_blob(dest_blob_)
             , settings(settings_)
             , object_metadata(object_metadata_)
             , schedule(schedule_)
@@ -75,8 +75,8 @@ namespace
         std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
         size_t offset;
         size_t total_size;
-        const String & dest_bucket;
-        const String & dest_key;
+        const String & dest_container;
+        const String & dest_blob;
         std::shared_ptr<AzureObjectStorageSettings> settings;
         const std::optional<std::map<String, String>> & object_metadata;
         ThreadPoolCallbackRunner<void> schedule;
@@ -170,7 +170,7 @@ namespace
 
         void completeMultipartUpload()
         {
-            auto block_blob_client = client->GetBlockBlobClient(dest_key);
+            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
             block_blob_client.CommitBlockList(block_ids);
         }
 
@@ -207,7 +207,7 @@ namespace
 
         void uploadPart(size_t part_offset, size_t part_size)
         {
-            LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Size: {}", dest_bucket, dest_key, part_size);
+            LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, Size: {}", dest_container, dest_blob, part_size);
 
             if (!part_size)
             {
@@ -286,7 +286,7 @@ namespace
 
             std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
             task.block_id = block_id;
-            LOG_TRACE(log, "Writing part finished. Bucket: {}, Key: {}, block_id: {}, Parts: {}", dest_bucket, dest_key, block_id, bg_tasks.size());
+            LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, block_id: {}, Parts: {}", dest_container, dest_blob, block_id, bg_tasks.size());
         }
 
         String processUploadPartRequest(UploadPartTask & task)
@@ -295,7 +295,7 @@ namespace
             if (for_disk_azure_blob_storage)
                 ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
 
-            auto block_blob_client = client->GetBlockBlobClient(dest_key);
+            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
             task.block_id = getRandomASCIIString(64);
             Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.data), task.size);
             block_blob_client.StageBlock(task.block_id, memory);
@@ -330,14 +330,14 @@ void copyDataToAzureBlobStorageFile(
     size_t offset,
     size_t size,
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
-    const String & dest_bucket,
-    const String & dest_key,
+    const String & dest_container,
+    const String & dest_blob,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
-    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
+    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container, dest_blob, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
     helper.performCopy();
 }
 
@@ -345,12 +345,12 @@ void copyDataToAzureBlobStorageFile(
 void copyAzureBlobStorageFile(
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
-    const String & src_bucket,
-    const String & src_key,
+    const String & src_container,
+    const String & src_blob,
     size_t offset,
     size_t size,
-    const String & dest_bucket,
-    const String & dest_key,
+    const String & dest_container,
+    const String & dest_blob,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata,
@@ -363,21 +363,21 @@ void copyAzureBlobStorageFile(
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)
             ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
-        auto block_blob_client_src = src_client->GetBlockBlobClient(src_key);
-        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_key);
+        auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
+        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
         auto uri = block_blob_client_src.GetUrl();
         block_blob_client_dest.CopyFromUri(uri);
     }
     else
     {
-        LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Bucket: {}, Key: {}", src_bucket, src_key);
+        LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container, src_blob);
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_key, read_settings, settings->max_single_read_retries,
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings->max_single_read_retries,
             settings->max_single_download_retries);
         };
 
-        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
+        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container, dest_blob, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
         helper.performCopy();
     }
 }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 31228fbcb23..059d0318f57 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -23,12 +23,12 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 void copyAzureBlobStorageFile(
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
-    const String & src_bucket,
-    const String & src_key,
+    const String & src_container,
+    const String & src_path,
     size_t src_offset,
     size_t src_size,
-    const String & dest_bucket,
-    const String & dest_key,
+    const String & dest_container,
+    const String & dest_path,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
@@ -46,8 +46,8 @@ void copyDataToAzureBlobStorageFile(
     size_t offset,
     size_t size,
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & client,
+    const String & dest_container,
     const String & dest_bucket,
-    const String & dest_key,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},

From 91bad5bc39963e9450f284dfc6b45fd69fa146de Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 4 Jan 2024 16:06:36 +0100
Subject: [PATCH 048/884] Updated to use MultiVersion for BlobContainerClient
 in Backups and updated to get client from disk

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     | 72 +++++--------------
 src/Backups/BackupIO_AzureBlobStorage.h       |  4 +-
 .../AzureBlobStorage/AzureObjectStorage.h     |  5 ++
 .../copyAzureBlobStorageFile.cpp              | 20 +++---
 .../copyAzureBlobStorageFile.h                |  6 +-
 5 files changed, 37 insertions(+), 70 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index de40fc6b33b..968a60c566f 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -27,8 +27,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-//using AzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
-
 BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     StorageAzureBlob::Configuration configuration_,
     const ReadSettings & read_settings_,
@@ -38,12 +36,13 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     , data_source_description{DataSourceType::AzureBlobStorage, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
     settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
     auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
-                                                          std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(*client.get()),
+                                                          std::move(client_ptr),
                                                           std::move(settings_as_unique_ptr));
+    client = object_storage->getClient();
 }
 
 BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
@@ -89,7 +88,7 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const
         key = file_name;
     }
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client, key, read_settings, settings->max_single_read_retries,
+        client.get(), key, read_settings, settings->max_single_read_retries,
         settings->max_single_download_retries);
 }
 
@@ -113,23 +112,9 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                                 "Blob writing function called with unexpected blob_path.size={} or mode={}",
                                 blob_path.size(), mode);
 
-            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client;
-            if (configuration.container == blob_path[1])
-            {
-                dest_client = client;
-            }
-            else
-            {
-                StorageAzureBlob::Configuration dest_configuration = configuration;
-                dest_configuration.container = blob_path[1];
-                dest_configuration.blob_path = blob_path[0];
-                dest_client = StorageAzureBlob::createClient(dest_configuration, /* is_read_only */ false);
-            }
-
-
             copyAzureBlobStorageFile(
                 client,
-                dest_client,
+                reinterpret_cast<AzureObjectStorage *>(destination_disk->getObjectStorage().get())->getClient(),
                 configuration.container,
                 fs::path(configuration.blob_path) / path_in_backup,
                 0,
@@ -163,12 +148,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     , data_source_description{DataSourceType::AzureBlobStorage,configuration_.container, false, false}
     , configuration(configuration_)
 {
-    client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
     settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
     auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
-                                                          std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(*client.get()),
-                                                                  std::move(settings_as_unique_ptr));
+                                                          std::move(client_ptr),
+                                                          std::move(settings_as_unique_ptr));
+    client = object_storage->getClient();
 }
 
 void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@@ -182,23 +168,9 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
         /// In this case we can't use the native copy.
         if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
         {
-
-            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client;
-            if (configuration.container == blob_path[1])
-            {
-                src_client = client;
-            }
-            else
-            {
-                StorageAzureBlob::Configuration src_configuration = configuration;
-                src_configuration.container = blob_path[1];
-                src_configuration.blob_path = blob_path[0];
-                src_client = StorageAzureBlob::createClient(src_configuration, /* is_read_only */ false);
-            }
-
             LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
             copyAzureBlobStorageFile(
-                src_client,
+                reinterpret_cast<AzureObjectStorage *>(src_disk->getObjectStorage().get())->getClient(),
                 client,
                 /* src_container */ blob_path[1],
                 /* src_path */ blob_path[0],
@@ -220,26 +192,16 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
 
 void BackupWriterAzureBlobStorage::copyFile(const String & destination, const String & source, size_t size)
 {
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client;
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client;
-    StorageAzureBlob::Configuration src_configuration = configuration;
-    src_configuration.container = source;
-    src_client = StorageAzureBlob::createClient(src_configuration, /* is_read_only */ false);
-
-    StorageAzureBlob::Configuration dest_configuration = configuration;
-    dest_configuration.container = destination;
-    dest_client = StorageAzureBlob::createClient(dest_configuration, /* is_read_only */ false);
-
     LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
     copyAzureBlobStorageFile(
-       src_client,
-       dest_client,
+       client,
+       client,
        configuration.container,
-       fs::path(configuration.blob_path),
+       fs::path(source),
        0,
        size,
-       /* dest_container */ destination,
-       /* dest_path */ configuration.blob_path,
+       /* dest_container */ configuration.container,
+       /* dest_path */ destination,
        settings,
        read_settings,
        {},
@@ -303,7 +265,7 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
     }
 
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client, key, read_settings, settings->max_single_read_retries,
+        client.get(), key, read_settings, settings->max_single_read_retries,
         settings->max_single_download_retries);
 }
 
@@ -319,7 +281,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
         key = file_name;
     }
     return std::make_unique<WriteBufferFromAzureBlobStorage>(
-        client,
+        client.get(),
         key,
         settings->max_single_part_upload_size,
         DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 87a6c3ef675..12bf073cd08 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -28,7 +28,7 @@ public:
 
 private:
     const DataSourceDescription data_source_description;
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<AzureObjectStorageSettings> settings;
@@ -57,7 +57,7 @@ private:
     std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
     void removeFilesBatch(const Strings & file_names);
     const DataSourceDescription data_source_description;
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<AzureObjectStorageSettings> settings;
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 55c81b4b7d9..1ff4537742f 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -134,6 +134,11 @@ public:
 
     bool isRemote() const override { return true; }
 
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getClient()
+    {
+        return client;
+    }
+
 private:
     const String name;
     /// client used to access the files in the Blob Storage cloud
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index df1341efdd1..4ec90d2830e 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -43,7 +43,7 @@ namespace
     public:
         UploadHelper(
             const CreateReadBuffer & create_read_buffer_,
-            std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client_,
+            MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client_,
             size_t offset_,
             size_t total_size_,
             const String & dest_container_,
@@ -72,7 +72,7 @@ namespace
 
     protected:
         std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
-        std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> client;
+        MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client;
         size_t offset;
         size_t total_size;
         const String & dest_container;
@@ -170,7 +170,7 @@ namespace
 
         void completeMultipartUpload()
         {
-            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
+            auto block_blob_client = client.get()->GetBlockBlobClient(dest_blob);
             block_blob_client.CommitBlockList(block_ids);
         }
 
@@ -295,7 +295,7 @@ namespace
             if (for_disk_azure_blob_storage)
                 ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
 
-            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
+            auto block_blob_client = client.get()->GetBlockBlobClient(dest_blob);
             task.block_id = getRandomASCIIString(64);
             Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.data), task.size);
             block_blob_client.StageBlock(task.block_id, memory);
@@ -329,7 +329,7 @@ void copyDataToAzureBlobStorageFile(
     const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
     size_t offset,
     size_t size,
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
     const String & dest_container,
     const String & dest_blob,
     std::shared_ptr<AzureObjectStorageSettings> settings,
@@ -343,8 +343,8 @@ void copyDataToAzureBlobStorageFile(
 
 
 void copyAzureBlobStorageFile(
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
     const String & src_container,
     const String & src_blob,
     size_t offset,
@@ -363,8 +363,8 @@ void copyAzureBlobStorageFile(
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)
             ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
-        auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
-        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
+        auto block_blob_client_src = src_client.get()->GetBlockBlobClient(src_blob);
+        auto block_blob_client_dest = dest_client.get()->GetBlockBlobClient(dest_blob);
         auto uri = block_blob_client_src.GetUrl();
         block_blob_client_dest.CopyFromUri(uri);
     }
@@ -373,7 +373,7 @@ void copyAzureBlobStorageFile(
         LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container, src_blob);
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings->max_single_read_retries,
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client.get(), src_blob, read_settings, settings->max_single_read_retries,
             settings->max_single_download_retries);
         };
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 059d0318f57..a6502541db1 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -21,8 +21,8 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// Copies a file from AzureBlobStorage to AzureBlobStorage.
 /// The parameters `src_offset` and `src_size` specify a part in the source to copy.
 void copyAzureBlobStorageFile(
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> src_client,
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> dest_client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
     const String & src_container,
     const String & src_path,
     size_t src_offset,
@@ -45,7 +45,7 @@ void copyDataToAzureBlobStorageFile(
     const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
     size_t offset,
     size_t size,
-    std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> & client,
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client,
     const String & dest_container,
     const String & dest_bucket,
     std::shared_ptr<AzureObjectStorageSettings> settings,

From c14605caa7f403531a6ff0663c242aa5d466ab07 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 4 Jan 2024 18:27:54 +0100
Subject: [PATCH 049/884] Added flag use_native_copy and updated to use
 StartCopyFromUri for native copy with large files

---
 .../AzureBlobStorage/AzureBlobStorageAuth.cpp |  3 ++-
 .../AzureBlobStorage/AzureObjectStorage.h     | 10 +++----
 .../copyAzureBlobStorageFile.cpp              | 26 ++++++++++++++++---
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
index 9e703d6fc5e..e29def06363 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
@@ -167,7 +167,8 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
         config.getInt(config_prefix + ".list_object_keys_size", 1000),
         config.getUInt64(config_prefix + ".min_upload_part_size", 16 * 1024 * 1024),
         config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
-        config.getUInt64(config_prefix + ".max_part_number", 10000)
+        config.getUInt64(config_prefix + ".max_part_number", 10000),
+        config.getBool(config_prefix + ".use_native_copy", false)
     );
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 1ff4537742f..436b48c0ad4 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -27,7 +27,8 @@ struct AzureObjectStorageSettings
         int list_object_keys_size_,
         size_t min_upload_part_size_,
         size_t max_upload_part_size_,
-        size_t max_part_number_)
+        size_t max_part_number_,
+        bool use_native_copy_)
         : max_single_part_upload_size(max_single_part_upload_size_)
         , min_bytes_for_seek(min_bytes_for_seek_)
         , max_single_read_retries(max_single_read_retries_)
@@ -36,6 +37,7 @@ struct AzureObjectStorageSettings
         , min_upload_part_size(min_upload_part_size_)
         , max_upload_part_size(max_upload_part_size_)
         , max_part_number(max_part_number_)
+        , use_native_copy(use_native_copy_)
     {
     }
 
@@ -49,6 +51,7 @@ struct AzureObjectStorageSettings
     size_t min_upload_part_size = 16 * 1024 * 1024;
     size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
     size_t max_part_number = 10000;
+    bool use_native_copy = false;
 };
 
 using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
@@ -134,10 +137,7 @@ public:
 
     bool isRemote() const override { return true; }
 
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getClient()
-    {
-        return client;
-    }
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getClient() { return client; }
 
 private:
     const String name;
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 4ec90d2830e..9db5ddb476a 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int INVALID_CONFIG_PARAMETER;
+    extern const int AZURE_BLOB_STORAGE_ERROR;
 }
 
 
@@ -358,15 +359,34 @@ void copyAzureBlobStorageFile(
     bool for_disk_azure_blob_storage)
 {
 
-    if (size < max_single_operation_copy_size)
+    if (settings->use_native_copy )
     {
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)
             ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
+
         auto block_blob_client_src = src_client.get()->GetBlockBlobClient(src_blob);
         auto block_blob_client_dest = dest_client.get()->GetBlockBlobClient(dest_blob);
-        auto uri = block_blob_client_src.GetUrl();
-        block_blob_client_dest.CopyFromUri(uri);
+        auto source_uri = block_blob_client_src.GetUrl();
+
+        if (size < max_single_operation_copy_size)
+        {
+            block_blob_client_dest.CopyFromUri(source_uri);
+        }
+        else
+        {
+            Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri);
+
+            // Wait for the operation to finish, checking for status every 100 second.
+            auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100));
+            auto properties_model = copy_response.Value;
+
+            if (properties_model.CopySource.HasValue())
+            {
+                throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy failed");
+            }
+
+        }
     }
     else
     {

From 2ee68933123583fe585093868e65c3562d36d66a Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 5 Jan 2024 10:58:04 +0100
Subject: [PATCH 050/884] Updated to return container for getObjectsNamespace

---
 src/Backups/BackupIO_AzureBlobStorage.cpp                  | 6 ++++--
 .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp | 7 +++++--
 .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.h   | 6 ++++--
 .../AzureBlobStorage/registerDiskAzureBlobStorage.cpp      | 4 +++-
 src/Storages/StorageAzureBlob.cpp                          | 2 +-
 src/TableFunctions/TableFunctionAzureBlobStorage.cpp       | 4 ++--
 .../TableFunctionAzureBlobStorageCluster.cpp               | 4 ++--
 7 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 968a60c566f..5ddbb42e2c0 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -41,7 +41,8 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          std::move(settings_as_unique_ptr));
+                                                          std::move(settings_as_unique_ptr),
+                                                          configuration_.container);
     client = object_storage->getClient();
 }
 
@@ -153,7 +154,8 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          std::move(settings_as_unique_ptr));
+                                                          std::move(settings_as_unique_ptr),
+                                                          configuration_.container);
     client = object_storage->getClient();
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 068e2aebab1..1f92ef48350 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -92,10 +92,12 @@ private:
 AzureObjectStorage::AzureObjectStorage(
     const String & name_,
     AzureClientPtr && client_,
-    SettingsPtr && settings_)
+    SettingsPtr && settings_,
+    const String & container_)
     : name(name_)
     , client(std::move(client_))
     , settings(std::move(settings_))
+    , container(container_)
     , log(&Poco::Logger::get("AzureObjectStorage"))
 {
     data_source_description.type = DataSourceType::AzureBlobStorage;
@@ -379,7 +381,8 @@ std::unique_ptr<IObjectStorage> AzureObjectStorage::cloneObjectStorage(const std
     return std::make_unique<AzureObjectStorage>(
         name,
         getAzureBlobContainerClient(config, config_prefix),
-        getAzureBlobStorageSettings(config, config_prefix, context)
+        getAzureBlobStorageSettings(config, config_prefix, context),
+        container
     );
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 436b48c0ad4..660d4a30889 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -66,7 +66,8 @@ public:
     AzureObjectStorage(
         const String & name_,
         AzureClientPtr && client_,
-        SettingsPtr && settings_);
+        SettingsPtr && settings_,
+        const String & container_);
 
     void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
 
@@ -125,7 +126,7 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    String getObjectsNamespace() const override { return ""; }
+    String getObjectsNamespace() const override { return container ; }
 
     std::unique_ptr<IObjectStorage> cloneObjectStorage(
         const std::string & new_namespace,
@@ -144,6 +145,7 @@ private:
     /// client used to access the files in the Blob Storage cloud
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
     MultiVersion<AzureObjectStorageSettings> settings;
+    const String container;
 
     Poco::Logger * log;
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
index 7ba9d21db62..2ffd910f92a 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
@@ -26,10 +26,12 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access
     {
         auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
 
+        String container_name = config.getString(config_prefix + ".container_name", "default-container");
         ObjectStoragePtr azure_object_storage = std::make_unique<AzureObjectStorage>(
             name,
             getAzureBlobContainerClient(config, config_prefix),
-            getAzureBlobStorageSettings(config, config_prefix, context));
+            getAzureBlobStorageSettings(config, config_prefix, context),
+            container_name);
 
         String key_prefix;
         auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, key_prefix);
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index f1070c8c31e..fcd7074b9d2 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -314,7 +314,7 @@ void registerStorageAzureBlob(StorageFactory & factory)
 
         return std::make_shared<StorageAzureBlob>(
             std::move(configuration),
-            std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings)),
+            std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings),configuration.container),
             args.getContext(),
             args.table_id,
             args.columns,
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
index d394c836369..b098cac5144 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
+++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
@@ -262,7 +262,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
         auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
         auto settings = StorageAzureBlob::createSettings(context);
 
-        auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
+        auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container);
         return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
     }
 
@@ -293,7 +293,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct
 
     StoragePtr storage = std::make_shared<StorageAzureBlob>(
         configuration,
-        std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
+        std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
         context,
         StorageID(getDatabaseName(), table_name),
         columns,
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
index eee585967c2..1c3b302a186 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
@@ -40,7 +40,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
         /// On worker node this filename won't contains globs
         storage = std::make_shared<StorageAzureBlob>(
             configuration,
-            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
+            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
             context,
             StorageID(getDatabaseName(), table_name),
             columns,
@@ -55,7 +55,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
         storage = std::make_shared<StorageAzureBlobCluster>(
             cluster_name,
             configuration,
-            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
+            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
             StorageID(getDatabaseName(), table_name),
             columns,
             ConstraintsDescription{},

From b250acff789620be57e21977d8f3d4a3468070d5 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 5 Jan 2024 11:26:32 +0100
Subject: [PATCH 051/884] Fixed style check

---
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 9db5ddb476a..3399f1705f4 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -359,7 +359,7 @@ void copyAzureBlobStorageFile(
     bool for_disk_azure_blob_storage)
 {
 
-    if (settings->use_native_copy )
+    if (settings->use_native_copy)
     {
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)

From 356fc0aadb8f7c0f15f72c3b72955e1db7046e48 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 7 Jan 2024 14:49:24 +0100
Subject: [PATCH 052/884] Fix tests

---
 src/Storages/StorageView.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 2f7267e3701..1898e49de86 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -112,8 +112,14 @@ StorageView::StorageView(
     : IStorage(table_id_)
 {
     StorageInMemoryMetadata storage_metadata;
-    if (is_parameterized_view_ && !query.isParameterizedView())
+    if (is_parameterized_view_)
+    {
+        if (!query.isParameterizedView())
+            storage_metadata.setColumns(columns_);
+    }
+    else
         storage_metadata.setColumns(columns_);
+
     storage_metadata.setComment(comment);
 
     if (!query.select)

From fd92c1961e5f09411d83b21c4fe9f00b78be22ba Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 7 Jan 2024 16:33:48 +0100
Subject: [PATCH 053/884] Fix clang tidy build

---
 src/Backups/BackupIO_AzureBlobStorage.cpp            | 12 ++++++------
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 10 +++++-----
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h   |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 5ddbb42e2c0..8c6c1040eec 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -305,21 +305,21 @@ void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
     object_storage->removeObjectIfExists(object);
 }
 
-void BackupWriterAzureBlobStorage::removeFiles(const Strings & keys)
+void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
 {
     StoredObjects objects;
-    for (const auto & key : keys)
-        objects.emplace_back(key);
+    for (const auto & file_name : file_names)
+        objects.emplace_back(file_name);
 
     object_storage->removeObjectsIfExist(objects);
 
 }
 
-void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & keys)
+void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
 {
     StoredObjects objects;
-    for (const auto & key : keys)
-        objects.emplace_back(key);
+    for (const auto & file_name : file_names)
+        objects.emplace_back(file_name);
 
     object_storage->removeObjectsIfExist(objects);
 }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 3399f1705f4..272be914cc1 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -65,11 +65,11 @@ namespace
             , schedule(schedule_)
             , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
             , log(log_)
-            , max_single_part_upload_size(settings_.get()->max_single_part_upload_size)
+            , max_single_part_upload_size(settings_->max_single_part_upload_size)
         {
         }
 
-        ~UploadHelper() {}
+        virtual ~UploadHelper() = default;
 
     protected:
         std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
@@ -114,9 +114,9 @@ namespace
             if (!total_size)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen");
 
-            auto max_part_number = settings.get()->max_part_number;
-            auto min_upload_part_size = settings.get()->min_upload_part_size;
-            auto max_upload_part_size = settings.get()->max_upload_part_size;
+            auto max_part_number = settings->max_part_number;
+            auto min_upload_part_size = settings->min_upload_part_size;
+            auto max_upload_part_size = settings->max_upload_part_size;
 
             if (!max_part_number)
                 throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0");
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index a6502541db1..b022151d32d 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -24,11 +24,11 @@ void copyAzureBlobStorageFile(
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
     const String & src_container,
-    const String & src_path,
+    const String & src_blob,
     size_t src_offset,
     size_t src_size,
     const String & dest_container,
-    const String & dest_path,
+    const String & dest_blob,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
@@ -47,7 +47,7 @@ void copyDataToAzureBlobStorageFile(
     size_t size,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client,
     const String & dest_container,
-    const String & dest_bucket,
+    const String & dest_blob,
     std::shared_ptr<AzureObjectStorageSettings> settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},

From f50f7f56949021d01ba692f6788e50d411ca8af9 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 8 Jan 2024 14:25:33 +0100
Subject: [PATCH 054/884] Removed unwanted includes

---
 .../registerBackupEngineAzureBlobStorage.cpp  | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index ef95206831f..810da5adb3f 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -10,13 +10,11 @@
 #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <filesystem>
 #endif
 
 
 namespace DB
 {
-namespace fs = std::filesystem;
 
 namespace ErrorCodes
 {
@@ -25,23 +23,6 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-#if USE_AZURE_BLOB_STORAGE
-namespace
-{
-    String removeFileNameFromURL(String & url)
-    {
-        Poco::URI url2{url};
-        String path = url2.getPath();
-        size_t slash_pos = path.find_last_of('/');
-        String file_name = path.substr(slash_pos + 1);
-        path.resize(slash_pos + 1);
-        url2.setPath(path);
-        url = url2.toString();
-        return file_name;
-    }
-}
-#endif
-
 
 void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
 {

From 2d914721e5101215c2c63c97151552cb7c8ff746 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 8 Jan 2024 15:10:37 +0100
Subject: [PATCH 055/884] Fix build

---
 .../registerBackupEngineAzureBlobStorage.cpp    | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 810da5adb3f..3480ea75f1f 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -10,6 +10,7 @@
 #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
+#include <filesystem>
 #endif
 
 
@@ -23,6 +24,22 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+#if USE_AZURE_BLOB_STORAGE
+namespace
+{
+    String removeFileNameFromURL(String & url)
+    {
+        Poco::URI url2{url};
+        String path = url2.getPath();
+        size_t slash_pos = path.find_last_of('/');
+        String file_name = path.substr(slash_pos + 1);
+        path.resize(slash_pos + 1);
+        url2.setPath(path);
+        url = url2.toString();
+        return file_name;
+    }
+}
+#endif
 
 void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
 {

From c5bf722ee2d2b50d1b0691112b769e3e67612214 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 8 Jan 2024 21:24:44 +0300
Subject: [PATCH 056/884] Create ch/chc/chl symlinks by cmake as well (for
 develop mode)

Before, they had been created only by install target.

Follow-up for: #56634

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index b3a5af6d6c9..6e544bac81c 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -328,6 +328,10 @@ set (CLICKHOUSE_BUNDLE)
 if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
     list(APPEND CLICKHOUSE_BUNDLE self-extracting)
 endif ()
+
+if (NOT BUILD_STANDALONE_KEEPER)
+    add_custom_target (ch ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ch DEPENDS clickhouse)
+endif()
 if (ENABLE_CLICKHOUSE_SERVER)
     add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
     install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
@@ -335,11 +339,13 @@ if (ENABLE_CLICKHOUSE_SERVER)
 endif ()
 if (ENABLE_CLICKHOUSE_CLIENT)
     add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse)
+    add_custom_target (chc ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chc DEPENDS clickhouse)
     install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
     list(APPEND CLICKHOUSE_BUNDLE clickhouse-client)
 endif ()
 if (ENABLE_CLICKHOUSE_LOCAL)
     add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse)
+    add_custom_target (chl ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chl DEPENDS clickhouse)
     install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
     list(APPEND CLICKHOUSE_BUNDLE clickhouse-local)
 endif ()

From 629d4b921e5cf2d709d2ca7a55658d95407e2ff7 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 9 Jan 2024 15:38:04 +0000
Subject: [PATCH 057/884] Fix style

---
 src/Analyzer/Passes/IfConstantConditionPass.cpp | 2 +-
 src/Storages/StorageMerge.cpp                   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/IfConstantConditionPass.cpp b/src/Analyzer/Passes/IfConstantConditionPass.cpp
index f3b8b712dbf..6b24eb1d539 100644
--- a/src/Analyzer/Passes/IfConstantConditionPass.cpp
+++ b/src/Analyzer/Passes/IfConstantConditionPass.cpp
@@ -57,7 +57,7 @@ public:
 
 }
 
-void IfConstantConditionPass::run(QueryTreeNodePtr & query_tree_node,  ContextPtr context)
+void IfConstantConditionPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     IfConstantConditionVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 15ca6e65482..ffbf98e85c7 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -92,7 +92,6 @@ namespace ErrorCodes
     extern const int SAMPLING_NOT_SUPPORTED;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int LOGICAL_ERROR;
 }
 
 StorageMerge::DatabaseNameOrRegexp::DatabaseNameOrRegexp(

From c30736d415fcdaccb68a1c0e37e8c4de9242e014 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 12 Jan 2024 15:31:15 +0000
Subject: [PATCH 058/884] Cosmetics

---
 src/Storages/MergeTree/MutateTask.cpp         |  8 +--
 ...mn_must_not_override_past_values.reference | 33 ++++++++++++
 ...e_column_must_not_override_past_values.sql | 53 +++++++++++++++++++
 ..._column_not_override_past_values.reference | 29 ----------
 ...ialize_column_not_override_past_values.sql | 49 -----------------
 5 files changed, 90 insertions(+), 82 deletions(-)
 create mode 100644 tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
 create mode 100644 tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
 delete mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
 delete mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index bb41608eb00..25fa45e7b68 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -76,8 +76,8 @@ static void splitAndModifyMutationCommands(
         {
             if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
             {
-                /// For ordinary column with default expression, materialize column should not override past values
-                /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file
+                /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
+                /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
                 auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
                 if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
                 {
@@ -206,8 +206,8 @@ static void splitAndModifyMutationCommands(
         {
             if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
             {
-                /// For ordinary column with default expression, materialize column should not override past values
-                /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file
+                /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
+                /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
                 auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
                 if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
                     for_interpreter.push_back(command);
diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
new file mode 100644
index 00000000000..a5a0370620b
--- /dev/null
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
@@ -0,0 +1,33 @@
+-- Compact parts
+Origin
+1	2
+2	54321
+After materialize
+1	2
+2	54321
+-- Wide parts
+Origin
+1	2
+2	54321
+After materialize
+1	2
+2	54321
+-- Nullable column != physically absent
+Origin
+1	2
+2	\N
+3	54321
+After materialize
+1	2
+2	\N
+3	54321
+-- Parts with renamed column
+Origin
+1	2
+2	54321
+After rename
+1	2
+2	54321
+After materialize
+1	2
+2	54321
diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
new file mode 100644
index 00000000000..825c7eab048
--- /dev/null
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
@@ -0,0 +1,53 @@
+SET mutations_sync = 2;
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Origin';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Wide parts';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Origin';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Nullable column != physically absent';
+
+CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id, dflt) VALUES (2, NULL);
+INSERT INTO tab (id) VALUES (3);
+SELECT 'Origin';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Parts with renamed column';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Origin';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab RENAME COLUMN dflt TO dflt2;
+SELECT 'After rename';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN bar;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
deleted file mode 100644
index 6b0d88bd09b..00000000000
--- a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference
+++ /dev/null
@@ -1,29 +0,0 @@
---Origin--
-1	2
-2	54321
---After materialize--
-1	2
-2	54321
---Origin--
-1	2
-2	54321
---After materialize--
-1	2
-2	54321
---Origin--
-1	2
-2	\N
-3	54321
---After materialize--
-1	2
-2	\N
-3	54321
---Origin--
-1	2
-2	54321
---After rename--
-1	2
-2	54321
---After materialize--
-1	2
-2	54321
diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql
deleted file mode 100644
index 1815661e097..00000000000
--- a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql
+++ /dev/null
@@ -1,49 +0,0 @@
-
-SET mutations_sync = 2;
--- Compact parts
-CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id;
-INSERT INTO test ( id, foo ) values ( 1, 2 );
-INSERT INTO test ( id ) values ( 2 );
-SELECT '--Origin--';
-SELECT * FROM test ORDER BY id;
-ALTER TABLE test MATERIALIZE COLUMN foo;
-SELECT '--After materialize--';
-SELECT * FROM test ORDER BY id;
-DROP TABLE test;
-
--- Wide parts
-CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
-INSERT INTO test ( id, foo ) values ( 1, 2 );
-INSERT INTO test ( id ) values ( 2 );
-SELECT '--Origin--';
-SELECT * FROM test ORDER BY id;
-ALTER TABLE test MATERIALIZE COLUMN foo;
-SELECT '--After materialize--';
-SELECT * FROM test ORDER BY id;
-DROP TABLE test;
-
--- Nullable column != physically absent
-CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
-INSERT INTO test ( id, foo ) values ( 1, 2 );
-INSERT INTO test ( id, foo ) values ( 2, NULL );
-INSERT INTO test ( id ) values ( 3 );
-SELECT '--Origin--';
-SELECT * FROM test ORDER BY id;
-ALTER TABLE test MATERIALIZE COLUMN foo;
-SELECT '--After materialize--';
-SELECT * FROM test ORDER BY id;
-DROP TABLE test;
-
--- Parts with renamed column
-CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id;
-INSERT INTO test ( id, foo ) values ( 1, 2 );
-INSERT INTO test ( id ) values ( 2 );
-SELECT '--Origin--';
-SELECT * FROM test ORDER BY id;
-ALTER TABLE test RENAME COLUMN foo TO bar;
-SELECT '--After rename--';
-SELECT * FROM test ORDER BY id;
-ALTER TABLE test MATERIALIZE COLUMN bar;
-SELECT '--After materialize--';
-SELECT * FROM test ORDER BY id;
-DROP TABLE test;
\ No newline at end of file

From ffde721f08359e0437c44026881e2514012a4966 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 13 Jan 2024 23:09:10 +0300
Subject: [PATCH 059/884] Update 02932_set_ttl_where.sql

---
 tests/queries/0_stateless/02932_set_ttl_where.sql | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02932_set_ttl_where.sql b/tests/queries/0_stateless/02932_set_ttl_where.sql
index bf2b317c4bf..ee8473e1af2 100644
--- a/tests/queries/0_stateless/02932_set_ttl_where.sql
+++ b/tests/queries/0_stateless/02932_set_ttl_where.sql
@@ -1,3 +1,5 @@
+-- Tags: no-ordinary-database
+
 create or replace table t_temp (
     a UInt32,
     timestamp DateTime
@@ -12,3 +14,5 @@ select sleep(1);
 insert into t_temp select rand(), now() from system.numbers limit 1_000_000;
 select sleep(1);
 optimize table t_temp final;
+
+DROP TABLE t_temp;

From 12585ea0e4cae1771ee6b51dd85a309e5923f12c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 13 Jan 2024 23:10:27 +0300
Subject: [PATCH 060/884] Update TTLDescription.cpp

---
 src/Storages/TTLDescription.cpp | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index bfd3afc30d8..3db5269b617 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -117,11 +117,6 @@ TTLDescription::TTLDescription(const TTLDescription & other)
     , if_exists(other.if_exists)
     , recompression_codec(other.recompression_codec)
 {
-    // if (other.expression)
-    //     expression = other.expression->clone();
-
-    // if (other.where_expression)
-    //     where_expression = other.where_expression->clone();
 }
 
 TTLDescription & TTLDescription::operator=(const TTLDescription & other)
@@ -135,11 +130,6 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     else
         expression_ast.reset();
 
-    // if (other.expression)
-    //     expression = other.expression->clone();
-    // else
-    //     expression.reset();
-
     expression_columns = other.expression_columns;
     result_column = other.result_column;
 
@@ -148,11 +138,6 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     else
         where_expression_ast.reset();
 
-    // if (other.where_expression)
-    //     where_expression = other.where_expression->clone();
-    // else
-    //     where_expression.reset();
-
     where_expression_columns = other.where_expression_columns;
     where_result_column = other.where_result_column;
     group_by_keys = other.group_by_keys;
@@ -179,7 +164,6 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType
     auto dag = analyzer.getActionsDAG(false);
 
     const auto * col = &dag->findInOutputs(ast->getColumnName());
-    // std::cerr << "buildExpressionAndSets " << ttl_string << std::endl;
     if (col->result_name != ttl_string)
         col = &dag->addAlias(*col, ttl_string);
 
@@ -189,10 +173,6 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType
     result.expression = std::make_shared<ExpressionActions>(dag, ExpressionActionsSettings::fromContext(context));
     result.sets = analyzer.getPreparedSets();
 
-    // std::cerr << "--------- buildExpressionAndSets\n";
-    // std::cerr << result.expression->dumpActions() << std::endl;
-    // std::cerr << result.sets->getSubqueries().size() << std::endl;
-
     return result;
 }
 
@@ -232,8 +212,6 @@ TTLDescription TTLDescription::getTTLFromAST(
     auto expression = buildExpressionAndSets(ttl_ast, columns.getAllPhysical(), context).expression;
     result.expression_columns = expression->getRequiredColumnsWithTypes();
 
-    // auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
-    // result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
     result.result_column = expression->getSampleBlock().safeGetByPosition(0).name;
 
     ExpressionActionsPtr where_expression;
@@ -256,9 +234,6 @@ TTLDescription TTLDescription::getTTLFromAST(
             {
                 result.where_expression_ast = where_expr_ast->clone();
                 where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
-                // auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
-                // result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
-
                 result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
                 result.where_result_column = where_expression->getSampleBlock().safeGetByPosition(0).name;
             }

From 776ea26ce71287735897b00c65b47d73e8d9811c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 02:45:51 +0300
Subject: [PATCH 061/884] Update PreparedSets.h

---
 src/Interpreters/PreparedSets.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h
index 30bfda4700d..4f5ca337c5b 100644
--- a/src/Interpreters/PreparedSets.h
+++ b/src/Interpreters/PreparedSets.h
@@ -115,7 +115,6 @@ public:
     SetPtr buildSetInplace(const ContextPtr & context);
 
     std::unique_ptr<QueryPlan> build(const ContextPtr & context);
-    void buildSetInplace(const ContextPtr & context);
 
     QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); }
     void setQueryPlan(std::unique_ptr<QueryPlan> source_);

From 1afc5e8c01685d1bb3e86b5a0fff55618db517b0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 03:45:42 +0100
Subject: [PATCH 062/884] Enable coverage for debug build

---
 docker/packager/packager | 8 ++++++++
 tests/ci/build_check.py  | 2 ++
 tests/ci/ci_config.py    | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/docker/packager/packager b/docker/packager/packager
index ade36a55591..4c443896f4a 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -121,6 +121,7 @@ def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bo
 
 def parse_env_variables(
     debug_build: bool,
+    coverage: bool,
     compiler: str,
     sanitizer: str,
     package_type: str,
@@ -287,6 +288,9 @@ def parse_env_variables(
     else:
         result.append("BUILD_TYPE=None")
 
+    if coverage:
+        result.append("SANITIZE_COVERAGE=1")
+
     if not cache:
         cmake_flags.append("-DCOMPILER_CACHE=disabled")
 
@@ -415,6 +419,9 @@ def parse_args() -> argparse.Namespace:
         choices=("address", "thread", "memory", "undefined", ""),
         default="",
     )
+    parser.add_argument(
+        "--coverage", action="store_true", help="enable granular coverage with introspection"
+    )
 
     parser.add_argument("--clang-tidy", action="store_true")
     parser.add_argument(
@@ -507,6 +514,7 @@ def main() -> None:
 
     env_prepared = parse_env_variables(
         args.debug_build,
+        args.coverage,
         args.compiler,
         args.sanitizer,
         args.package_type,
diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 27243aac4f1..fe4308f5933 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -73,6 +73,8 @@ def get_packager_cmd(
         cmd += " --debug-build"
     if build_config.sanitizer:
         cmd += f" --sanitizer={build_config.sanitizer}"
+    if build_config.coverage:
+        cmd += " --coverage"
     if build_config.tidy:
         cmd += " --clang-tidy"
 
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index e3319fe4a72..b8dff3f0a28 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -62,6 +62,7 @@ class BuildConfig:
     package_type: Literal["deb", "binary", "fuzzers"]
     additional_pkgs: bool = False
     debug_build: bool = False
+    coverage: bool = False
     sanitizer: str = ""
     tidy: bool = False
     sparse_checkout: bool = False
@@ -473,6 +474,7 @@ CI_CONFIG = CiConfig(
             name="package_debug",
             compiler="clang-17",
             debug_build=True,
+            coverage=True,
             package_type="deb",
             sparse_checkout=True,
         ),

From 0219d58d925bd3f7901f9251c2abca76c1ae00dc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 14 Jan 2024 02:56:50 +0000
Subject: [PATCH 063/884] Automatic style fix

---
 docker/packager/packager | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 4c443896f4a..3e7f1ba447e 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -420,7 +420,9 @@ def parse_args() -> argparse.Namespace:
         default="",
     )
     parser.add_argument(
-        "--coverage", action="store_true", help="enable granular coverage with introspection"
+        "--coverage",
+        action="store_true",
+        help="enable granular coverage with introspection",
     )
 
     parser.add_argument("--clang-tidy", action="store_true")

From 6405decbb0ad0e80fe20b22a9956481abbe3b479 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 05:14:54 +0100
Subject: [PATCH 064/884] Fix Python

---
 docker/packager/packager | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 4c443896f4a..2e2b6550636 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -289,7 +289,7 @@ def parse_env_variables(
         result.append("BUILD_TYPE=None")
 
     if coverage:
-        result.append("SANITIZE_COVERAGE=1")
+        cmake_flags.append("-DSANITIZE_COVERAGE=1")
 
     if not cache:
         cmake_flags.append("-DCOMPILER_CACHE=disabled")

From 9f5a7c51175dc3d4cfe46065b4912e7973a30983 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 08:05:57 +0100
Subject: [PATCH 065/884] Fix error

---
 cmake/sanitize.cmake                  | 1 +
 contrib/jemalloc-cmake/CMakeLists.txt | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 3f7a8498059..3882b51227e 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -82,3 +82,4 @@ if (SANITIZE_COVERAGE)
 endif()
 
 set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
+set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt
index 15e965ed841..f85a38dcf8a 100644
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE
 
 target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
 
+# Because our coverage callbacks call malloc, and recursive call of malloc could not work.
+target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST})
+
 if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
     target_compile_definitions(_jemalloc PRIVATE
         -DJEMALLOC_DEBUG=1

From 3d904cbf81eb6ce2472eabdcd0be5f6955984ce5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 08:09:08 +0100
Subject: [PATCH 066/884] Slightly better

---
 base/base/coverage.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index d70c3bcd82b..ac8055e836c 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -1,4 +1,5 @@
 #include "coverage.h"
+#include <sys/mman.h>
 
 #pragma GCC diagnostic ignored "-Wreserved-identifier"
 
@@ -57,6 +58,14 @@ namespace
 
     uintptr_t * all_addresses_array = nullptr;
     size_t all_addresses_array_size = 0;
+
+    uintptr_t * allocate(size_t size)
+    {
+        void * map = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (MAP_FAILED == map)
+            return nullptr;
+        return static_cast<uintptr_t*>(map);
+    }
 }
 
 extern "C"
@@ -79,7 +88,7 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
     coverage_array_size = stop - start;
 
     /// Note: we will leak this.
-    coverage_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
+    coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
 
     resetCoverage();
 }
@@ -92,7 +101,7 @@ void __sanitizer_cov_pcs_init(const uintptr_t * pcs_begin, const uintptr_t * pcs
         return;
     pc_table_initialized = true;
 
-    all_addresses_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
+    all_addresses_array = allocate(sizeof(uintptr_t) * coverage_array_size);
     all_addresses_array_size = pcs_end - pcs_begin;
 
     /// They are not a real pointers, but also contain a flag in the most significant bit,

From 33d9a1d4e83d58f15e36ea6e88908c8410f03c40 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 22:03:47 +0100
Subject: [PATCH 067/884] Documentation

---
 src/Functions/coverage.cpp | 48 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp
index 8a62469fa54..86de047a76b 100644
--- a/src/Functions/coverage.cpp
+++ b/src/Functions/coverage.cpp
@@ -85,8 +85,52 @@ public:
 
 REGISTER_FUNCTION(Coverage)
 {
-    factory.registerFunction("coverage", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); });
-    factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::All)); });
+    factory.registerFunction("coverage", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); },
+        FunctionDocumentation
+        {
+            .description=R"(
+This function is only available if ClickHouse was built with the SANITIZE_COVERAGE=1 option.
+
+It returns an array of unique addresses (a subset of the instrumented points in code) in the code
+encountered at runtime after the previous coverage reset (with the `SYSTEM RESET COVERAGE` query) or after server startup.
+
+[example:functions]
+
+The order of array elements is undetermined.
+
+You can use another function, `coverageAll` to find all instrumented addresses in the code to compare and calculate the percentage.
+
+You can process the addresses with the `addressToSymbol` (possibly with `demangle`) and `addressToLine` functions
+to calculate symbol-level, file-level, or line-level coverage.
+
+If you run multiple tests sequentially and reset the coverage with the `SYSTEM RESET COVERAGE` query between the tests,
+you can obtain a coverage information for every test in isolation, to find which functions are covered by which tests and vise-versa.
+
+By default, every *basic block* in the code is covered, which roughly means - a sequence of instructions without jumps,
+e.g. a body of for loop without ifs, or a single branch of if.
+
+See https://clang.llvm.org/docs/SanitizerCoverage.html for more information.
+)",
+            .examples{
+                {"functions", "SELECT DISTINCT demangle(addressToSymbol(arrayJoin(coverage())))", ""}},
+            .categories{"Introspection"}
+        });
+
+    factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::All)); },
+        FunctionDocumentation
+        {
+            .description=R"(
+This function is only available if ClickHouse was built with the SANITIZE_COVERAGE=1 option.
+
+It returns an array of all unique addresses in the code instrumented for coverage
+- all possible addresses that can appear in the result of the `coverage` function.
+
+You can use this function, and the `coverage` function to compare and calculate the coverage percentage.
+
+See the `coverage` function for the details.
+)",
+            .categories{"Introspection"}
+        });
 }
 
 }

From 3bd2c7e384d07d07da8768aa4708c7726b828db5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 14 Jan 2024 22:06:25 +0100
Subject: [PATCH 068/884] Report coverage if available

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index c7049b0e0c8..2d278f18176 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2840,7 +2840,7 @@ def parse_args():
     parser.add_argument(
         "--collect-per-test-coverage",
         action="store_true",
-        default=False,
+        default=True,
         help="Create `system.coverage` table on the server and collect information about low-level code coverage on a per test basis there",
     )
     parser.add_argument(

From 9141e1693f03f39d2eda37423918d2b2d873877a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:24:17 +0100
Subject: [PATCH 069/884] Calculate cumulative coverage by default.

---
 tests/clickhouse-test | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 2d278f18176..f1b20a3a43e 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1322,6 +1322,7 @@ class TestCase:
         # We want to calculate per-test code coverage. That's why we reset it before each test.
         if (
             args.collect_per_test_coverage
+            and args.reset_coverage_before_every_test
             and BuildFlags.SANITIZE_COVERAGE in args.build_flags
         ):
             clickhouse_execute(
@@ -2843,6 +2844,12 @@ def parse_args():
         default=True,
         help="Create `system.coverage` table on the server and collect information about low-level code coverage on a per test basis there",
     )
+    parser.add_argument(
+        "--reset-coverage-before-every-test",
+        action="store_true",
+        default=False,
+        help="Collect isolated test coverage for every test instead of a cumulative. Useful only when tests are run sequentially.",
+    )
     parser.add_argument(
         "--report-logs-stats",
         action="store_true",

From f7abeff0857ec231a7107d2a006b5f98b60a689f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:25:07 +0100
Subject: [PATCH 070/884] Slightly better reporting

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index f1b20a3a43e..e480957e5f4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1259,7 +1259,7 @@ class TestCase:
                 retry_error_codes=True,
             ).decode()
 
-            description_full += f" Coverage: {coverage}"
+            description_full += f" (coverage: {coverage})"
 
         description_full += "\n"
 

From 3e09feda336a355173b46ec85a9cd86d640f3348 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:36:07 +0100
Subject: [PATCH 071/884] More functions

---
 base/base/coverage.cpp     | 21 +++++++++++++++------
 base/base/coverage.h       |  5 ++++-
 src/Functions/coverage.cpp | 33 ++++++++++++++++++++++++++++-----
 tests/clickhouse-test      |  6 +++---
 4 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index ac8055e836c..499e384d21f 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -53,7 +53,8 @@ namespace
     uint32_t * guards_start = nullptr;
     uint32_t * guards_end = nullptr;
 
-    uintptr_t * coverage_array = nullptr;
+    uintptr_t * current_coverage_array = nullptr;
+    uintptr_t * cumulative_coverage_array = nullptr;
     size_t coverage_array_size = 0;
 
     uintptr_t * all_addresses_array = nullptr;
@@ -88,7 +89,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
     coverage_array_size = stop - start;
 
     /// Note: we will leak this.
-    coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
+    current_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
+    cumulative_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
 
     resetCoverage();
 }
@@ -126,15 +128,22 @@ void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
     /// and use them to dereference an array or a bit vector.
     void * pc = __builtin_return_address(0);
 
-    coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
+    current_coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
+    cumulative_coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
 }
 
 }
 
 __attribute__((no_sanitize("coverage")))
-std::span<const uintptr_t> getCoverage()
+std::span<const uintptr_t> getCurrentCoverage()
 {
-    return {coverage_array, coverage_array_size};
+    return {current_coverage_array, coverage_array_size};
+}
+
+__attribute__((no_sanitize("coverage")))
+std::span<const uintptr_t> getCumulativeCoverage()
+{
+    return {cumulative_coverage_array, coverage_array_size};
 }
 
 __attribute__((no_sanitize("coverage")))
@@ -146,7 +155,7 @@ std::span<const uintptr_t> getAllInstrumentedAddresses()
 __attribute__((no_sanitize("coverage")))
 void resetCoverage()
 {
-    memset(coverage_array, 0, coverage_array_size * sizeof(*coverage_array));
+    memset(current_coverage_array, 0, coverage_array_size * sizeof(*current_coverage_array));
 
     /// The guard defines whether the __sanitizer_cov_trace_pc_guard should be called.
     /// For example, you can unset it after first invocation to prevent excessive work.
diff --git a/base/base/coverage.h b/base/base/coverage.h
index f75ed2d3553..a6e5a6848d7 100644
--- a/base/base/coverage.h
+++ b/base/base/coverage.h
@@ -15,7 +15,10 @@ void dumpCoverageReportIfPossible();
 /// Get accumulated unique program addresses of the instrumented parts of the code,
 /// seen so far after program startup or after previous reset.
 /// The returned span will be represented as a sparse map, containing mostly zeros, which you should filter away.
-std::span<const uintptr_t> getCoverage();
+std::span<const uintptr_t> getCurrentCoverage();
+
+/// Similar but not being reset.
+std::span<const uintptr_t> getCumulativeCoverage();
 
 /// Get all instrumented addresses that could be in the coverage.
 std::span<const uintptr_t> getAllInstrumentedAddresses();
diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp
index 86de047a76b..f4cac26df78 100644
--- a/src/Functions/coverage.cpp
+++ b/src/Functions/coverage.cpp
@@ -21,11 +21,14 @@ namespace
 enum class Kind
 {
     Current,
+    Cumulative,
     All
 };
 
 /** If ClickHouse is build with coverage instrumentation, returns an array
-  * of currently accumulated (`coverage`) / all possible (`coverageAll`) unique code addresses.
+  * of currently accumulated (`coverageCurrent`)
+  * or accumulated since the startup (`coverageCumulative`)
+  * or all possible (`coverageAll`) unique code addresses.
   */
 class FunctionCoverage : public IFunction
 {
@@ -64,7 +67,11 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
     {
-        auto coverage_table = kind == Kind::Current ? getCoverage() : getAllInstrumentedAddresses();
+        auto coverage_table = kind == Kind::Current
+            ? getCurrentCoverage()
+            : (kind == Kind::Cumulative
+                ? getCumulativeCoverage()
+                : getAllInstrumentedAddresses());
 
         auto column_addresses = ColumnUInt64::create();
         auto & data = column_addresses->getData();
@@ -85,7 +92,7 @@ public:
 
 REGISTER_FUNCTION(Coverage)
 {
-    factory.registerFunction("coverage", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); },
+    factory.registerFunction("coverageCurrent", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); },
         FunctionDocumentation
         {
             .description=R"(
@@ -112,7 +119,23 @@ e.g. a body of for loop without ifs, or a single branch of if.
 See https://clang.llvm.org/docs/SanitizerCoverage.html for more information.
 )",
             .examples{
-                {"functions", "SELECT DISTINCT demangle(addressToSymbol(arrayJoin(coverage())))", ""}},
+                {"functions", "SELECT DISTINCT demangle(addressToSymbol(arrayJoin(coverageCurrent())))", ""}},
+            .categories{"Introspection"}
+        });
+
+    factory.registerFunction("coverageCumulative", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Cumulative)); },
+        FunctionDocumentation
+        {
+            .description=R"(
+This function is only available if ClickHouse was built with the SANITIZE_COVERAGE=1 option.
+
+It returns an array of unique addresses (a subset of the instrumented points in code) in the code
+encountered at runtime after server startup.
+
+In contrast to `coverageCurrent` it cannot be reset with the `SYSTEM RESET COVERAGE`.
+
+See the `coverageCurrent` function for the details.
+)",
             .categories{"Introspection"}
         });
 
@@ -127,7 +150,7 @@ It returns an array of all unique addresses in the code instrumented for coverag
 
 You can use this function, and the `coverage` function to compare and calculate the coverage percentage.
 
-See the `coverage` function for the details.
+See the `coverageCurrent` function for the details.
 )",
             .categories{"Introspection"}
         });
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e480957e5f4..a39c90947ba 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1249,13 +1249,13 @@ class TestCase:
         ):
             clickhouse_execute(
                 args,
-                f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverage()",
+                f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverageCurrent()",
                 retry_error_codes=True,
             )
 
             coverage = clickhouse_execute(
                 args,
-                "SELECT length(coverage())",
+                "SELECT length(coverageCurrent())",
                 retry_error_codes=True,
             ).decode()
 
@@ -2460,7 +2460,7 @@ def main(args):
         # Coverage collected at the system startup before running any tests:
         clickhouse_execute(
             args,
-            "INSERT INTO system.coverage SELECT now(), '', coverage()",
+            "INSERT INTO system.coverage SELECT now(), '', coverageCurrent()",
         )
 
     total_tests_run = 0

From e4cd02ea39642dd9b8d519aee0426b752423c3bf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:36:24 +0100
Subject: [PATCH 072/884] Fix typo

---
 src/IO/OpenedFile.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/IO/OpenedFile.h b/src/IO/OpenedFile.h
index 10c36d9e1d3..4c4de2265bc 100644
--- a/src/IO/OpenedFile.h
+++ b/src/IO/OpenedFile.h
@@ -21,7 +21,7 @@ public:
     OpenedFile(const std::string & file_name_, int flags_);
     ~OpenedFile();
 
-    /// Close prematurally.
+    /// Close prematurely.
     void close();
 
     int getFD() const;
@@ -40,4 +40,3 @@ private:
 };
 
 }
-

From 30c362909089d6f7fe93b639dfdf1666d5bcfc7c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:36:44 +0100
Subject: [PATCH 073/884] An option to dump coverage to a file at exit

---
 programs/main.cpp | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 7d07112de66..4852ed8990e 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -1,6 +1,7 @@
 #include <csignal>
 #include <csetjmp>
 #include <unistd.h>
+#include <fcntl.h>
 
 #include <new>
 #include <iostream>
@@ -19,6 +20,7 @@
 #include <Common/IO.h>
 
 #include <base/phdr_cache.h>
+#include <base/coverage.h>
 
 
 /// Universal executable for various clickhouse applications
@@ -512,6 +514,49 @@ int main(int argc_, char ** argv_)
     if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-'))
         main_func = mainEntryClickHouseLocal;
 
-    return main_func(static_cast<int>(argv.size()), argv.data());
+    int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
+
+#if defined(SANITIZE_COVERAGE)
+    /// A user can request to dump the coverage information into files at exit.
+    /// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
+    /// that cannot introspect it with SQL functions at runtime.
+
+    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for two filenames:
+    /// 'prefix.covered' and 'prefix.all' which will contain
+    /// the list of addresses of covered and all instrumented addresses, respectively.
+
+    /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
+
+    if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
+    {
+        auto dumpCoverage = [](const std::string & name, auto span)
+        {
+            /// Write only non-zeros.
+            std::vector<uintptr_t> data;
+            data.reserve(span.size());
+            for (auto addr : span)
+                if (addr)
+                    data.push_back(addr);
+
+            int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
+            if (-1 == fd)
+            {
+                writeError("Cannot open a file to write the coverage data\n");
+            }
+            else
+            {
+                if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
+                    writeError("Cannot write the coverage data to a file\n");
+                if (0 != ::close(fd))
+                    writeError("Cannot close the file with coverage data\n");
+            }
+        };
+
+        dumpCoverage(coverage_filename_prefix + std::string(".covered"), getCumulativeCoverage());
+        dumpCoverage(coverage_filename_prefix + std::string(".all"), getAllInstrumentedAddresses());
+    }
+#endif
+
+    return exit_code;
 }
 #endif

From fe952fb64c460c260c77336142b5eb4bd05b46d8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:38:42 +0100
Subject: [PATCH 074/884] Rename to system.coverage_log to simplify export

---
 tests/clickhouse-test | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index a39c90947ba..eb85bdff0f5 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1249,7 +1249,7 @@ class TestCase:
         ):
             clickhouse_execute(
                 args,
-                f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverageCurrent()",
+                f"INSERT INTO system.coverage_log SELECT now(), '{self.case}', coverageCurrent()",
                 retry_error_codes=True,
             )
 
@@ -2448,7 +2448,7 @@ def main(args):
         clickhouse_execute(
             args,
             """
-                CREATE TABLE IF NOT EXISTS system.coverage
+                CREATE TABLE IF NOT EXISTS system.coverage_log
                 (
                     time DateTime,
                     test_name String,
@@ -2460,7 +2460,7 @@ def main(args):
         # Coverage collected at the system startup before running any tests:
         clickhouse_execute(
             args,
-            "INSERT INTO system.coverage SELECT now(), '', coverageCurrent()",
+            "INSERT INTO system.coverage_log SELECT now(), '', coverageCurrent()",
         )
 
     total_tests_run = 0
@@ -2842,7 +2842,7 @@ def parse_args():
         "--collect-per-test-coverage",
         action="store_true",
         default=True,
-        help="Create `system.coverage` table on the server and collect information about low-level code coverage on a per test basis there",
+        help="Create `system.coverage_log` table on the server and collect information about low-level code coverage on a per test basis there",
     )
     parser.add_argument(
         "--reset-coverage-before-every-test",

From 7662628393f97dd1c094b3346cc55c71f10ad193 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:41:14 +0100
Subject: [PATCH 075/884] Export coverage to the CI database

---
 docker/test/base/setup_export_logs.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index ea82e071112..659bf29b057 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -129,6 +129,19 @@ function setup_logs_replication
     debug_or_sanitizer_build=$(clickhouse-client -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'")
     echo "Build is debug or sanitizer: $debug_or_sanitizer_build"
 
+    # We will pre-create a table system.coverage_log.
+    # It is normally created by clickhouse-test rather than the server,
+    # so we will create it in advance to make it be picked up by the next commands:
+
+    clickhouse-client --query "
+        CREATE TABLE IF NOT EXISTS system.coverage_log
+        (
+            time DateTime,
+            test_name String,
+            coverage Array(UInt64)
+        ) ENGINE = MergeTree ORDER BY test_name
+    "
+
     # For each system log table:
     echo 'Create %_log tables'
     clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table

From 97200e2c5d65693ad5d1e6a7c7dea3d5cac0e23d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:46:01 +0100
Subject: [PATCH 076/884] Symbolization

---
 docker/test/base/setup_export_logs.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 659bf29b057..e141bc00a77 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -138,7 +138,8 @@ function setup_logs_replication
         (
             time DateTime,
             test_name String,
-            coverage Array(UInt64)
+            coverage Array(UInt64),
+            symbols Array(LowCardinality(String)) MATERIALIZED arrayMap(x -> demangle(addressToSymbol(x)), coverage)
         ) ENGINE = MergeTree ORDER BY test_name
     "
 

From bf2e5748575ad2eb74eb057e0ee242a149edecdb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 00:48:47 +0100
Subject: [PATCH 077/884] Symbolization

---
 docker/test/base/setup_export_logs.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index e141bc00a77..20dd864318f 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -188,7 +188,7 @@ function setup_logs_replication
         echo "Creating table system.${table}_sender" >&2
 
         # Create Distributed table and materialized view to watch on the original table:
-        clickhouse-client --query "
+        clickhouse-client --asterisk_include_materialized_columns 1 --query "
             CREATE TABLE system.${table}_sender
             ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
             SETTINGS flush_on_detach=0
@@ -199,7 +199,7 @@ function setup_logs_replication
 
         echo "Creating materialized view system.${table}_watcher" >&2
 
-        clickhouse-client --query "
+        clickhouse-client --asterisk_include_materialized_columns 1 --query "
             CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
             SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
             FROM system.${table}

From c5dfae1bcade85289b78f0bb760c92bcee078743 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 05:07:23 +0100
Subject: [PATCH 078/884] Fix error

---
 docker/test/base/setup_export_logs.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 20dd864318f..26fcd10d666 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -23,6 +23,10 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
 EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace)::Array(LowCardinality(String)) AS symbols, arrayMap(x -> addressToLine(x), trace)::Array(LowCardinality(String)) AS lines"
 
+# coverage_log needs more columns for symbolization, but only symbol names (the line numbers are too heavy to calculate)
+EXTRA_COLUMNS_COVERAGE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), "
+EXTRA_COLUMNS_EXPRESSION_COVERAGE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), coverage)::Array(LowCardinality(String)) AS symbols"
+
 
 function __set_connection_args
 {
@@ -138,8 +142,7 @@ function setup_logs_replication
         (
             time DateTime,
             test_name String,
-            coverage Array(UInt64),
-            symbols Array(LowCardinality(String)) MATERIALIZED arrayMap(x -> demangle(addressToSymbol(x)), coverage)
+            coverage Array(UInt64)
         ) ENGINE = MergeTree ORDER BY test_name
     "
 
@@ -158,7 +161,10 @@ function setup_logs_replication
             else
                 EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
             fi
-        else
+        elif [[ "$table" = "coverage_log" ]]
+            EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_COVERAGE_LOG}"
+            EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_COVERAGE_LOG}"
+        then
             EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
             EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
         fi

From e13ca48bce836a2534047e59a4e922395a8f6a87 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 05:13:43 +0100
Subject: [PATCH 079/884] Better dump on exit

---
 programs/main.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 4852ed8990e..8958d84e243 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -521,9 +521,8 @@ int main(int argc_, char ** argv_)
     /// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
     /// that cannot introspect it with SQL functions at runtime.
 
-    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for two filenames:
-    /// 'prefix.covered' and 'prefix.all' which will contain
-    /// the list of addresses of covered and all instrumented addresses, respectively.
+    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
+    /// containing the list of addresses of covered .
 
     /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
 
@@ -552,8 +551,7 @@ int main(int argc_, char ** argv_)
             }
         };
 
-        dumpCoverage(coverage_filename_prefix + std::string(".covered"), getCumulativeCoverage());
-        dumpCoverage(coverage_filename_prefix + std::string(".all"), getAllInstrumentedAddresses());
+        dumpCoverage(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
     }
 #endif
 

From e49cfbef089499a457c8793724629e2e94c8dc37 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 05:40:03 +0100
Subject: [PATCH 080/884] Coverage for non-server tools

---
 tests/clickhouse-test         | 23 +++++++++++++++++++++++
 tests/queries/shell_config.sh |  4 ++++
 2 files changed, 27 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index eb85bdff0f5..bd796dbfdf2 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -12,6 +12,7 @@ import itertools
 import sys
 import os
 import os.path
+import glob
 import platform
 import signal
 import re
@@ -74,6 +75,10 @@ def stringhash(s):
     # only during process invocation https://stackoverflow.com/a/42089311
     return zlib.crc32(s.encode("utf-8"))
 
+def read_file_as_binary_string(file_path):
+    with open(file_path, 'rb') as file:
+        binary_data = file.read()
+    return binary_data
 
 # First and last lines of the log
 def trim_for_log(s):
@@ -101,6 +106,7 @@ class HTTPError(Exception):
 def clickhouse_execute_http(
     base_args,
     query,
+    body=None,
     timeout=30,
     settings=None,
     default_format=None,
@@ -140,6 +146,7 @@ def clickhouse_execute_http(
             client.request(
                 "POST",
                 f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}",
+                body=body
             )
             res = client.getresponse()
             data = res.read()
@@ -160,6 +167,7 @@ def clickhouse_execute_http(
 def clickhouse_execute(
     base_args,
     query,
+    body=None,
     timeout=30,
     settings=None,
     max_http_retries=5,
@@ -168,6 +176,7 @@ def clickhouse_execute(
     return clickhouse_execute_http(
         base_args,
         query,
+        body,
         timeout,
         settings,
         max_http_retries=max_http_retries,
@@ -181,6 +190,7 @@ def clickhouse_execute_json(
     data = clickhouse_execute_http(
         base_args,
         query,
+        None,
         timeout,
         settings,
         "JSONEachRow",
@@ -1253,6 +1263,19 @@ class TestCase:
                 retry_error_codes=True,
             )
 
+            # Check for dumped coverage files
+            file_pattern = "coverage.*"
+            matching_files = glob.glob(file_pattern)
+            for file_path in matching_files:
+                body = read_file_as_binary_string(file_path)
+                clickhouse_execute(
+                    args,
+                    f"INSERT INTO system.coverage_log SELECT now(), '{self.case}', groupArray(data) FROM input('data UInt64') FORMAT RowBinary",
+                    body=body,
+                    retry_error_codes=True,
+                )
+                os.remove(file_path)
+
             coverage = clickhouse_execute(
                 args,
                 "SELECT length(coverageCurrent())",
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index c687a63623f..614bfcece8f 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -4,6 +4,10 @@
 # Don't check for ODR violation, since we may test shared build with ASAN
 export ASAN_OPTIONS=detect_odr_violation=0
 
+# If ClickHouse was built with coverage - dump the coverage information at exit
+# (in other cases this environment variable has no effect)
+export CLICKHOUSE_WRITE_COVERAGE="coverage"
+
 export CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE:="test"}
 export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL:="warning"}
 

From 678a32cedee768b6c1a6748e96a0d103e853d8bc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 06:12:58 +0100
Subject: [PATCH 081/884] Obey Python's quirky formatter

---
 tests/integration/ci-runner.py                |  13 +-
 .../test_async_insert_memory/test.py          |   2 +-
 tests/integration/test_check_table/test.py    |  76 +++--
 .../test_cluster_discovery/test.py            |   2 +-
 .../test_ldap_external_user_directory/test.py |  26 +-
 tests/integration/test_mysql_protocol/test.py |  16 +-
 tests/integration/test_partition/test.py      |   4 +-
 .../test_replicated_database/test.py          |   9 +-
 .../test.py                                   |   9 +-
 .../s3_mocks/unstable_server.py               |   2 +-
 tests/integration/test_storage_s3/test.py     |  17 +-
 tests/integration/test_storage_url/test.py    |  22 +-
 tests/integration/test_system_merges/test.py  |  45 ++-
 utils/grpc-client/pb2/clickhouse_grpc_pb2.py  | 271 ++++++++++--------
 .../pb2/clickhouse_grpc_pb2_grpc.py           | 237 +++++++++------
 15 files changed, 433 insertions(+), 318 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 7c922e339fe..d54ed2bb767 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -305,14 +305,11 @@ class ClickhouseIntegrationTestsRunner:
     def _pre_pull_images(self, repo_path):
         image_cmd = self._get_runner_image_cmd(repo_path)
 
-        cmd = (
-            "cd {repo_path}/tests/integration && "
-            "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
-                repo_path=repo_path,
-                runner_opts=self._get_runner_opts(),
-                image_cmd=image_cmd,
-                command=r""" echo Pre Pull finished """,
-            )
+        cmd = "cd {repo_path}/tests/integration && " "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
+            repo_path=repo_path,
+            runner_opts=self._get_runner_opts(),
+            image_cmd=image_cmd,
+            command=r""" echo Pre Pull finished """,
         )
 
         for i in range(5):
diff --git a/tests/integration/test_async_insert_memory/test.py b/tests/integration/test_async_insert_memory/test.py
index 5d2e5503680..f897007f7bb 100644
--- a/tests/integration/test_async_insert_memory/test.py
+++ b/tests/integration/test_async_insert_memory/test.py
@@ -43,7 +43,7 @@ def test_memory_usage():
 
     response = node.get_query_request(
         "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format(
-            30 * (2**23)
+            30 * (2 ** 23)
         ),
         user="A",
     )
diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index 021977fb6b6..ebf404e698b 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -95,15 +95,25 @@ def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
         node1, "non_replicated_mt", "201902_1_1_0", database="default"
     )
 
-    assert node1.query(
-        "CHECK TABLE non_replicated_mt",
-        settings={"check_query_single_value_result": 0, "max_threads": 1},
-    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
+    assert (
+        node1.query(
+            "CHECK TABLE non_replicated_mt",
+            settings={"check_query_single_value_result": 0, "max_threads": 1},
+        )
+        .strip()
+        .split("\t")[0:2]
+        == ["201902_1_1_0", "0"]
+    )
 
-    assert node1.query(
-        "CHECK TABLE non_replicated_mt",
-        settings={"check_query_single_value_result": 0, "max_threads": 1},
-    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
+    assert (
+        node1.query(
+            "CHECK TABLE non_replicated_mt",
+            settings={"check_query_single_value_result": 0, "max_threads": 1},
+        )
+        .strip()
+        .split("\t")[0:2]
+        == ["201902_1_1_0", "0"]
+    )
 
     node1.query(
         "INSERT INTO non_replicated_mt VALUES (toDate('2019-01-01'), 1, 10), (toDate('2019-01-01'), 2, 12)"
@@ -123,10 +133,15 @@ def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
 
     remove_checksums_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
 
-    assert node1.query(
-        "CHECK TABLE non_replicated_mt PARTITION 201901",
-        settings={"check_query_single_value_result": 0, "max_threads": 1},
-    ).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
+    assert (
+        node1.query(
+            "CHECK TABLE non_replicated_mt PARTITION 201901",
+            settings={"check_query_single_value_result": 0, "max_threads": 1},
+        )
+        .strip()
+        .split("\t")[0:2]
+        == ["201901_2_2_0", "0"]
+    )
 
 
 @pytest.mark.parametrize("merge_tree_settings, zk_path_suffix", [("", "_0")])
@@ -194,12 +209,15 @@ def test_check_replicated_table_simple(
         == "201901_0_0_0\t1\t\n"
     )
 
-    assert sorted(
-        node2.query(
-            "CHECK TABLE replicated_mt",
-            settings={"check_query_single_value_result": 0},
-        ).split("\n")
-    ) == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"]
+    assert (
+        sorted(
+            node2.query(
+                "CHECK TABLE replicated_mt",
+                settings={"check_query_single_value_result": 0},
+            ).split("\n")
+        )
+        == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"]
+    )
 
     with pytest.raises(QueryRuntimeException) as exc:
         node2.query(
@@ -273,10 +291,13 @@ def test_check_replicated_table_corruption(
     )
 
     node1.query_with_retry("SYSTEM SYNC REPLICA replicated_mt_1")
-    assert node1.query(
-        "CHECK TABLE replicated_mt_1 PARTITION 201901",
-        settings={"check_query_single_value_result": 0, "max_threads": 1},
-    ) == "{}\t1\t\n".format(part_name)
+    assert (
+        node1.query(
+            "CHECK TABLE replicated_mt_1 PARTITION 201901",
+            settings={"check_query_single_value_result": 0, "max_threads": 1},
+        )
+        == "{}\t1\t\n".format(part_name)
+    )
     assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
     remove_part_from_disk(node2, "replicated_mt_1", part_name)
@@ -288,10 +309,13 @@ def test_check_replicated_table_corruption(
     )
 
     node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
-    assert node1.query(
-        "CHECK TABLE replicated_mt_1 PARTITION 201901",
-        settings={"check_query_single_value_result": 0, "max_threads": 1},
-    ) == "{}\t1\t\n".format(part_name)
+    assert (
+        node1.query(
+            "CHECK TABLE replicated_mt_1 PARTITION 201901",
+            settings={"check_query_single_value_result": 0, "max_threads": 1},
+        )
+        == "{}\t1\t\n".format(part_name)
+    )
     assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
 
diff --git a/tests/integration/test_cluster_discovery/test.py b/tests/integration/test_cluster_discovery/test.py
index ad3deb5b142..a2e7e15b956 100644
--- a/tests/integration/test_cluster_discovery/test.py
+++ b/tests/integration/test_cluster_discovery/test.py
@@ -61,7 +61,7 @@ def check_on_cluster(
         print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}")
 
         if retry != retries:
-            time.sleep(2**retry)
+            time.sleep(2 ** retry)
     else:
         msg = msg or f"Wrong '{what}' result"
         raise Exception(
diff --git a/tests/integration/test_ldap_external_user_directory/test.py b/tests/integration/test_ldap_external_user_directory/test.py
index 39753794d63..c9642c293ee 100644
--- a/tests/integration/test_ldap_external_user_directory/test.py
+++ b/tests/integration/test_ldap_external_user_directory/test.py
@@ -76,11 +76,14 @@ def test_role_mapping(ldap_cluster):
         "select currentUser()", user="johndoe", password="qwertz"
     ) == TSV([["johndoe"]])
 
-    assert instance.query(
-        "select role_name from system.current_roles ORDER BY role_name",
-        user="johndoe",
-        password="qwertz",
-    ) == TSV([["role_1"], ["role_2"]])
+    assert (
+        instance.query(
+            "select role_name from system.current_roles ORDER BY role_name",
+            user="johndoe",
+            password="qwertz",
+        )
+        == TSV([["role_1"], ["role_2"]])
+    )
 
     instance.query("CREATE ROLE role_3")
     add_ldap_group(ldap_cluster, group_cn="clickhouse-role_3", member_cn="johndoe")
@@ -88,8 +91,11 @@ def test_role_mapping(ldap_cluster):
     # See https://github.com/ClickHouse/ClickHouse/issues/54318
     add_ldap_group(ldap_cluster, group_cn="clickhouse-role_4", member_cn="johndoe")
 
-    assert instance.query(
-        "select role_name from system.current_roles ORDER BY role_name",
-        user="johndoe",
-        password="qwertz",
-    ) == TSV([["role_1"], ["role_2"], ["role_3"]])
+    assert (
+        instance.query(
+            "select role_name from system.current_roles ORDER BY role_name",
+            user="johndoe",
+            password="qwertz",
+        )
+        == TSV([["role_1"], ["role_2"], ["role_3"]])
+    )
diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 7a69d07633c..61e76c0dc97 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -854,14 +854,14 @@ def test_types(started_cluster):
 
     result = cursor.fetchall()[0]
     expected = [
-        ("Int8_column", -(2**7)),
-        ("UInt8_column", 2**8 - 1),
-        ("Int16_column", -(2**15)),
-        ("UInt16_column", 2**16 - 1),
-        ("Int32_column", -(2**31)),
-        ("UInt32_column", 2**32 - 1),
-        ("Int64_column", -(2**63)),
-        ("UInt64_column", 2**64 - 1),
+        ("Int8_column", -(2 ** 7)),
+        ("UInt8_column", 2 ** 8 - 1),
+        ("Int16_column", -(2 ** 15)),
+        ("UInt16_column", 2 ** 16 - 1),
+        ("Int32_column", -(2 ** 31)),
+        ("UInt32_column", 2 ** 32 - 1),
+        ("Int64_column", -(2 ** 63)),
+        ("UInt64_column", 2 ** 64 - 1),
         ("String_column", "тест"),
         ("FixedString_column", "тест"),
         ("Float32_column", 1.5),
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index 054418a8ba9..d39787f8924 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -561,9 +561,7 @@ def test_make_clone_in_detached(started_cluster):
         ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"]
     )
     assert_eq_with_retry(instance, "select * from clone_in_detached", "\n")
-    assert [
-        "broken_all_0_0_0",
-    ] == sorted(
+    assert ["broken_all_0_0_0",] == sorted(
         instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n")
     )
 
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 1fc3fe37044..16425c9bd9e 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -506,12 +506,9 @@ def test_alters_from_different_replicas(started_cluster):
     dummy_node.stop_clickhouse(kill=True)
 
     settings = {"distributed_ddl_task_timeout": 5}
-    assert (
-        "There are 1 unfinished hosts (0 of them are currently executing the task"
-        in competing_node.query_and_get_error(
-            "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
-            settings=settings,
-        )
+    assert "There are 1 unfinished hosts (0 of them are currently executing the task" in competing_node.query_and_get_error(
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
+        settings=settings,
     )
     settings = {
         "distributed_ddl_task_timeout": 5,
diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py
index 647626d8014..5a315707efb 100644
--- a/tests/integration/test_replicated_database_cluster_groups/test.py
+++ b/tests/integration/test_replicated_database_cluster_groups/test.py
@@ -95,12 +95,9 @@ def test_cluster_groups(started_cluster):
     # Exception
     main_node_2.stop_clickhouse()
     settings = {"distributed_ddl_task_timeout": 5}
-    assert (
-        "There are 1 unfinished hosts (0 of them are currently executing the task)"
-        in main_node_1.query_and_get_error(
-            "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
-            settings=settings,
-        )
+    assert "There are 1 unfinished hosts (0 of them are currently executing the task)" in main_node_1.query_and_get_error(
+        "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
+        settings=settings,
     )
 
     # 3. After start both groups are synced
diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
index 5ef781bdc9e..3632fa15d8a 100644
--- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
+++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
@@ -9,7 +9,7 @@ import time
 
 def gen_n_digit_number(n):
     assert 0 < n < 19
-    return random.randint(10 ** (n - 1), 10**n - 1)
+    return random.randint(10 ** (n - 1), 10 ** n - 1)
 
 
 sum_in_4_column = 0
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 2549cb0d473..e941356261a 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -553,16 +553,13 @@ def test_multipart(started_cluster, maybe_auth, positive):
         assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
 
     # select uploaded data from many threads
-    select_query = (
-        "select sum(column1), sum(column2), sum(column3) "
-        "from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
-            host=started_cluster.minio_redirect_host,
-            port=started_cluster.minio_redirect_port,
-            bucket=bucket,
-            filename=filename,
-            auth=maybe_auth,
-            table_format=table_format,
-        )
+    select_query = "select sum(column1), sum(column2), sum(column3) " "from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
+        host=started_cluster.minio_redirect_host,
+        port=started_cluster.minio_redirect_port,
+        bucket=bucket,
+        filename=filename,
+        auth=maybe_auth,
+        table_format=table_format,
     )
     try:
         select_result = run_query(
diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py
index 7ff7a871413..771df49cbac 100644
--- a/tests/integration/test_storage_url/test.py
+++ b/tests/integration/test_storage_url/test.py
@@ -79,15 +79,21 @@ def test_table_function_url_access_rights():
         f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1"
     )
 
-    assert node1.query(
-        f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
-        user="u1",
-    ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
+    assert (
+        node1.query(
+            f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
+            user="u1",
+        )
+        == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
+    )
 
-    assert node1.query(
-        f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
-        user="u1",
-    ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
+    assert (
+        node1.query(
+            f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
+            user="u1",
+        )
+        == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
+    )
 
     expected_error = "necessary to have the grant URL ON *.*"
     assert expected_error in node1.query_and_get_error(
diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py
index 6dbe6c891f2..bacb0eb500d 100644
--- a/tests/integration/test_system_merges/test.py
+++ b/tests/integration/test_system_merges/test.py
@@ -204,36 +204,33 @@ def test_mutation_simple(started_cluster, replicated):
             sleep_time=0.1,
         )
 
-        assert (
-            split_tsv(
-                node_check.query(
-                    """
+        assert split_tsv(
+            node_check.query(
+                """
             SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation
                 FROM system.merges
                 WHERE table = '{name}'
         """.format(
-                        name=table_name
-                    )
+                    name=table_name
                 )
             )
-            == [
-                [
-                    db_name,
-                    table_name,
-                    "1",
-                    "['{}']".format(part),
-                    "['{clickhouse}/{table_path}/{}/']".format(
-                        part, clickhouse=clickhouse_path, table_path=table_path
-                    ),
-                    result_part,
-                    "{clickhouse}/{table_path}/{}/".format(
-                        result_part, clickhouse=clickhouse_path, table_path=table_path
-                    ),
-                    "all",
-                    "1",
-                ],
-            ]
-        )
+        ) == [
+            [
+                db_name,
+                table_name,
+                "1",
+                "['{}']".format(part),
+                "['{clickhouse}/{table_path}/{}/']".format(
+                    part, clickhouse=clickhouse_path, table_path=table_path
+                ),
+                result_part,
+                "{clickhouse}/{table_path}/{}/".format(
+                    result_part, clickhouse=clickhouse_path, table_path=table_path
+                ),
+                "all",
+                "1",
+            ],
+        ]
         t.join()
 
         assert (
diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
index 6218047af3c..9bf7817c7d3 100644
--- a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
+++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
@@ -8,16 +8,17 @@ from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import message as _message
 from google.protobuf import reflection as _reflection
 from google.protobuf import symbol_database as _symbol_database
+
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
 
 
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00(\x01\x30\x01\x62\x06proto3'
+)
 
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3')
-
-_LOGSLEVEL = DESCRIPTOR.enum_types_by_name['LogsLevel']
+_LOGSLEVEL = DESCRIPTOR.enum_types_by_name["LogsLevel"]
 LogsLevel = enum_type_wrapper.EnumTypeWrapper(_LOGSLEVEL)
 LOG_NONE = 0
 LOG_FATAL = 1
@@ -30,134 +31,180 @@ LOG_DEBUG = 7
 LOG_TRACE = 8
 
 
-_NAMEANDTYPE = DESCRIPTOR.message_types_by_name['NameAndType']
-_EXTERNALTABLE = DESCRIPTOR.message_types_by_name['ExternalTable']
-_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name['SettingsEntry']
-_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name['ObsoleteTransportCompression']
-_QUERYINFO = DESCRIPTOR.message_types_by_name['QueryInfo']
-_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name['SettingsEntry']
-_LOGENTRY = DESCRIPTOR.message_types_by_name['LogEntry']
-_PROGRESS = DESCRIPTOR.message_types_by_name['Progress']
-_STATS = DESCRIPTOR.message_types_by_name['Stats']
-_EXCEPTION = DESCRIPTOR.message_types_by_name['Exception']
-_RESULT = DESCRIPTOR.message_types_by_name['Result']
-_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionAlgorithm']
-_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionLevel']
-NameAndType = _reflection.GeneratedProtocolMessageType('NameAndType', (_message.Message,), {
-  'DESCRIPTOR' : _NAMEANDTYPE,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType)
-  })
+_NAMEANDTYPE = DESCRIPTOR.message_types_by_name["NameAndType"]
+_EXTERNALTABLE = DESCRIPTOR.message_types_by_name["ExternalTable"]
+_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name["SettingsEntry"]
+_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name[
+    "ObsoleteTransportCompression"
+]
+_QUERYINFO = DESCRIPTOR.message_types_by_name["QueryInfo"]
+_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name["SettingsEntry"]
+_LOGENTRY = DESCRIPTOR.message_types_by_name["LogEntry"]
+_PROGRESS = DESCRIPTOR.message_types_by_name["Progress"]
+_STATS = DESCRIPTOR.message_types_by_name["Stats"]
+_EXCEPTION = DESCRIPTOR.message_types_by_name["Exception"]
+_RESULT = DESCRIPTOR.message_types_by_name["Result"]
+_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = (
+    _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name["CompressionAlgorithm"]
+)
+_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = (
+    _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name["CompressionLevel"]
+)
+NameAndType = _reflection.GeneratedProtocolMessageType(
+    "NameAndType",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _NAMEANDTYPE,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType)
+    },
+)
 _sym_db.RegisterMessage(NameAndType)
 
-ExternalTable = _reflection.GeneratedProtocolMessageType('ExternalTable', (_message.Message,), {
-
-  'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), {
-    'DESCRIPTOR' : _EXTERNALTABLE_SETTINGSENTRY,
-    '__module__' : 'clickhouse_grpc_pb2'
-    # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry)
-    })
-  ,
-  'DESCRIPTOR' : _EXTERNALTABLE,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable)
-  })
+ExternalTable = _reflection.GeneratedProtocolMessageType(
+    "ExternalTable",
+    (_message.Message,),
+    {
+        "SettingsEntry": _reflection.GeneratedProtocolMessageType(
+            "SettingsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXTERNALTABLE_SETTINGSENTRY,
+                "__module__": "clickhouse_grpc_pb2"
+                # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry)
+            },
+        ),
+        "DESCRIPTOR": _EXTERNALTABLE,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable)
+    },
+)
 _sym_db.RegisterMessage(ExternalTable)
 _sym_db.RegisterMessage(ExternalTable.SettingsEntry)
 
-ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType('ObsoleteTransportCompression', (_message.Message,), {
-  'DESCRIPTOR' : _OBSOLETETRANSPORTCOMPRESSION,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression)
-  })
+ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType(
+    "ObsoleteTransportCompression",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _OBSOLETETRANSPORTCOMPRESSION,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression)
+    },
+)
 _sym_db.RegisterMessage(ObsoleteTransportCompression)
 
-QueryInfo = _reflection.GeneratedProtocolMessageType('QueryInfo', (_message.Message,), {
-
-  'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), {
-    'DESCRIPTOR' : _QUERYINFO_SETTINGSENTRY,
-    '__module__' : 'clickhouse_grpc_pb2'
-    # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry)
-    })
-  ,
-  'DESCRIPTOR' : _QUERYINFO,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo)
-  })
+QueryInfo = _reflection.GeneratedProtocolMessageType(
+    "QueryInfo",
+    (_message.Message,),
+    {
+        "SettingsEntry": _reflection.GeneratedProtocolMessageType(
+            "SettingsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _QUERYINFO_SETTINGSENTRY,
+                "__module__": "clickhouse_grpc_pb2"
+                # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry)
+            },
+        ),
+        "DESCRIPTOR": _QUERYINFO,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo)
+    },
+)
 _sym_db.RegisterMessage(QueryInfo)
 _sym_db.RegisterMessage(QueryInfo.SettingsEntry)
 
-LogEntry = _reflection.GeneratedProtocolMessageType('LogEntry', (_message.Message,), {
-  'DESCRIPTOR' : _LOGENTRY,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry)
-  })
+LogEntry = _reflection.GeneratedProtocolMessageType(
+    "LogEntry",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LOGENTRY,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry)
+    },
+)
 _sym_db.RegisterMessage(LogEntry)
 
-Progress = _reflection.GeneratedProtocolMessageType('Progress', (_message.Message,), {
-  'DESCRIPTOR' : _PROGRESS,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress)
-  })
+Progress = _reflection.GeneratedProtocolMessageType(
+    "Progress",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _PROGRESS,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress)
+    },
+)
 _sym_db.RegisterMessage(Progress)
 
-Stats = _reflection.GeneratedProtocolMessageType('Stats', (_message.Message,), {
-  'DESCRIPTOR' : _STATS,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats)
-  })
+Stats = _reflection.GeneratedProtocolMessageType(
+    "Stats",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATS,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats)
+    },
+)
 _sym_db.RegisterMessage(Stats)
 
-Exception = _reflection.GeneratedProtocolMessageType('Exception', (_message.Message,), {
-  'DESCRIPTOR' : _EXCEPTION,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception)
-  })
+Exception = _reflection.GeneratedProtocolMessageType(
+    "Exception",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _EXCEPTION,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception)
+    },
+)
 _sym_db.RegisterMessage(Exception)
 
-Result = _reflection.GeneratedProtocolMessageType('Result', (_message.Message,), {
-  'DESCRIPTOR' : _RESULT,
-  '__module__' : 'clickhouse_grpc_pb2'
-  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result)
-  })
+Result = _reflection.GeneratedProtocolMessageType(
+    "Result",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _RESULT,
+        "__module__": "clickhouse_grpc_pb2"
+        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result)
+    },
+)
 _sym_db.RegisterMessage(Result)
 
-_CLICKHOUSE = DESCRIPTOR.services_by_name['ClickHouse']
+_CLICKHOUSE = DESCRIPTOR.services_by_name["ClickHouse"]
 if _descriptor._USE_C_DESCRIPTORS == False:
 
-  DESCRIPTOR._options = None
-  _EXTERNALTABLE_SETTINGSENTRY._options = None
-  _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b'8\001'
-  _QUERYINFO_SETTINGSENTRY._options = None
-  _QUERYINFO_SETTINGSENTRY._serialized_options = b'8\001'
-  _LOGSLEVEL._serialized_start=2363
-  _LOGSLEVEL._serialized_end=2520
-  _NAMEANDTYPE._serialized_start=42
-  _NAMEANDTYPE._serialized_end=83
-  _EXTERNALTABLE._serialized_start=86
-  _EXTERNALTABLE._serialized_end=331
-  _EXTERNALTABLE_SETTINGSENTRY._serialized_start=284
-  _EXTERNALTABLE_SETTINGSENTRY._serialized_end=331
-  _OBSOLETETRANSPORTCOMPRESSION._serialized_start=334
-  _OBSOLETETRANSPORTCOMPRESSION._serialized_end=723
-  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start=532
-  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end=614
-  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start=616
-  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end=723
-  _QUERYINFO._serialized_start=726
-  _QUERYINFO._serialized_end=1508
-  _QUERYINFO_SETTINGSENTRY._serialized_start=284
-  _QUERYINFO_SETTINGSENTRY._serialized_end=331
-  _LOGENTRY._serialized_start=1511
-  _LOGENTRY._serialized_end=1672
-  _PROGRESS._serialized_start=1674
-  _PROGRESS._serialized_end=1796
-  _STATS._serialized_start=1798
-  _STATS._serialized_end=1910
-  _EXCEPTION._serialized_start=1912
-  _EXCEPTION._serialized_end=1994
-  _RESULT._serialized_start=1997
-  _RESULT._serialized_end=2360
-  _CLICKHOUSE._serialized_start=2523
-  _CLICKHOUSE._serialized_end=2870
+    DESCRIPTOR._options = None
+    _EXTERNALTABLE_SETTINGSENTRY._options = None
+    _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b"8\001"
+    _QUERYINFO_SETTINGSENTRY._options = None
+    _QUERYINFO_SETTINGSENTRY._serialized_options = b"8\001"
+    _LOGSLEVEL._serialized_start = 2363
+    _LOGSLEVEL._serialized_end = 2520
+    _NAMEANDTYPE._serialized_start = 42
+    _NAMEANDTYPE._serialized_end = 83
+    _EXTERNALTABLE._serialized_start = 86
+    _EXTERNALTABLE._serialized_end = 331
+    _EXTERNALTABLE_SETTINGSENTRY._serialized_start = 284
+    _EXTERNALTABLE_SETTINGSENTRY._serialized_end = 331
+    _OBSOLETETRANSPORTCOMPRESSION._serialized_start = 334
+    _OBSOLETETRANSPORTCOMPRESSION._serialized_end = 723
+    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start = 532
+    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end = 614
+    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start = 616
+    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end = 723
+    _QUERYINFO._serialized_start = 726
+    _QUERYINFO._serialized_end = 1508
+    _QUERYINFO_SETTINGSENTRY._serialized_start = 284
+    _QUERYINFO_SETTINGSENTRY._serialized_end = 331
+    _LOGENTRY._serialized_start = 1511
+    _LOGENTRY._serialized_end = 1672
+    _PROGRESS._serialized_start = 1674
+    _PROGRESS._serialized_end = 1796
+    _STATS._serialized_start = 1798
+    _STATS._serialized_end = 1910
+    _EXCEPTION._serialized_start = 1912
+    _EXCEPTION._serialized_end = 1994
+    _RESULT._serialized_start = 1997
+    _RESULT._serialized_end = 2360
+    _CLICKHOUSE._serialized_start = 2523
+    _CLICKHOUSE._serialized_end = 2870
 # @@protoc_insertion_point(module_scope)
diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
index 1c71218bbe5..25643a243b3 100644
--- a/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
+++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
@@ -15,25 +15,25 @@ class ClickHouseStub(object):
             channel: A grpc.Channel.
         """
         self.ExecuteQuery = channel.unary_unary(
-                '/clickhouse.grpc.ClickHouse/ExecuteQuery',
-                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-                )
+            "/clickhouse.grpc.ClickHouse/ExecuteQuery",
+            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+        )
         self.ExecuteQueryWithStreamInput = channel.stream_unary(
-                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput',
-                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-                )
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput",
+            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+        )
         self.ExecuteQueryWithStreamOutput = channel.unary_stream(
-                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput',
-                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-                )
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput",
+            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+        )
         self.ExecuteQueryWithStreamIO = channel.stream_stream(
-                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO',
-                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-                )
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO",
+            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+        )
 
 
 class ClickHouseServicer(object):
@@ -42,124 +42,173 @@ class ClickHouseServicer(object):
     def ExecuteQuery(self, request, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
 
     def ExecuteQueryWithStreamInput(self, request_iterator, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
 
     def ExecuteQueryWithStreamOutput(self, request, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
 
     def ExecuteQueryWithStreamIO(self, request_iterator, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
 
 
 def add_ClickHouseServicer_to_server(servicer, server):
     rpc_method_handlers = {
-            'ExecuteQuery': grpc.unary_unary_rpc_method_handler(
-                    servicer.ExecuteQuery,
-                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-            ),
-            'ExecuteQueryWithStreamInput': grpc.stream_unary_rpc_method_handler(
-                    servicer.ExecuteQueryWithStreamInput,
-                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-            ),
-            'ExecuteQueryWithStreamOutput': grpc.unary_stream_rpc_method_handler(
-                    servicer.ExecuteQueryWithStreamOutput,
-                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-            ),
-            'ExecuteQueryWithStreamIO': grpc.stream_stream_rpc_method_handler(
-                    servicer.ExecuteQueryWithStreamIO,
-                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-            ),
+        "ExecuteQuery": grpc.unary_unary_rpc_method_handler(
+            servicer.ExecuteQuery,
+            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+        ),
+        "ExecuteQueryWithStreamInput": grpc.stream_unary_rpc_method_handler(
+            servicer.ExecuteQueryWithStreamInput,
+            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+        ),
+        "ExecuteQueryWithStreamOutput": grpc.unary_stream_rpc_method_handler(
+            servicer.ExecuteQueryWithStreamOutput,
+            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+        ),
+        "ExecuteQueryWithStreamIO": grpc.stream_stream_rpc_method_handler(
+            servicer.ExecuteQueryWithStreamIO,
+            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+        ),
     }
     generic_handler = grpc.method_handlers_generic_handler(
-            'clickhouse.grpc.ClickHouse', rpc_method_handlers)
+        "clickhouse.grpc.ClickHouse", rpc_method_handlers
+    )
     server.add_generic_rpc_handlers((generic_handler,))
 
 
- # This class is part of an EXPERIMENTAL API.
+# This class is part of an EXPERIMENTAL API.
 class ClickHouse(object):
     """Missing associated documentation comment in .proto file."""
 
     @staticmethod
-    def ExecuteQuery(request,
+    def ExecuteQuery(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_unary(
+            request,
             target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(request, target, '/clickhouse.grpc.ClickHouse/ExecuteQuery',
+            "/clickhouse.grpc.ClickHouse/ExecuteQuery",
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options, channel_credentials,
-            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
 
     @staticmethod
-    def ExecuteQueryWithStreamInput(request_iterator,
+    def ExecuteQueryWithStreamInput(
+        request_iterator,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.stream_unary(
+            request_iterator,
             target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.stream_unary(request_iterator, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput',
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput",
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options, channel_credentials,
-            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
 
     @staticmethod
-    def ExecuteQueryWithStreamOutput(request,
+    def ExecuteQueryWithStreamOutput(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_stream(
+            request,
             target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_stream(request, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput',
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput",
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options, channel_credentials,
-            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
 
     @staticmethod
-    def ExecuteQueryWithStreamIO(request_iterator,
+    def ExecuteQueryWithStreamIO(
+        request_iterator,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.stream_stream(
+            request_iterator,
             target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.stream_stream(request_iterator, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO',
+            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO",
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options, channel_credentials,
-            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )

From e42d10fa9ccf4296732941e9f1b333d692e83384 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 06:25:20 +0100
Subject: [PATCH 082/884] Revert "Obey Python's quirky formatter"

This reverts commit 678a32cedee768b6c1a6748e96a0d103e853d8bc.
---
 tests/integration/ci-runner.py                |  13 +-
 .../test_async_insert_memory/test.py          |   2 +-
 tests/integration/test_check_table/test.py    |  76 ++---
 .../test_cluster_discovery/test.py            |   2 +-
 .../test_ldap_external_user_directory/test.py |  26 +-
 tests/integration/test_mysql_protocol/test.py |  16 +-
 tests/integration/test_partition/test.py      |   4 +-
 .../test_replicated_database/test.py          |   9 +-
 .../test.py                                   |   9 +-
 .../s3_mocks/unstable_server.py               |   2 +-
 tests/integration/test_storage_s3/test.py     |  17 +-
 tests/integration/test_storage_url/test.py    |  22 +-
 tests/integration/test_system_merges/test.py  |  45 +--
 utils/grpc-client/pb2/clickhouse_grpc_pb2.py  | 271 ++++++++----------
 .../pb2/clickhouse_grpc_pb2_grpc.py           | 237 ++++++---------
 15 files changed, 318 insertions(+), 433 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index d54ed2bb767..7c922e339fe 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -305,11 +305,14 @@ class ClickhouseIntegrationTestsRunner:
     def _pre_pull_images(self, repo_path):
         image_cmd = self._get_runner_image_cmd(repo_path)
 
-        cmd = "cd {repo_path}/tests/integration && " "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
-            repo_path=repo_path,
-            runner_opts=self._get_runner_opts(),
-            image_cmd=image_cmd,
-            command=r""" echo Pre Pull finished """,
+        cmd = (
+            "cd {repo_path}/tests/integration && "
+            "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
+                repo_path=repo_path,
+                runner_opts=self._get_runner_opts(),
+                image_cmd=image_cmd,
+                command=r""" echo Pre Pull finished """,
+            )
         )
 
         for i in range(5):
diff --git a/tests/integration/test_async_insert_memory/test.py b/tests/integration/test_async_insert_memory/test.py
index f897007f7bb..5d2e5503680 100644
--- a/tests/integration/test_async_insert_memory/test.py
+++ b/tests/integration/test_async_insert_memory/test.py
@@ -43,7 +43,7 @@ def test_memory_usage():
 
     response = node.get_query_request(
         "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format(
-            30 * (2 ** 23)
+            30 * (2**23)
         ),
         user="A",
     )
diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index ebf404e698b..021977fb6b6 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -95,25 +95,15 @@ def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
         node1, "non_replicated_mt", "201902_1_1_0", database="default"
     )
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt",
-            settings={"check_query_single_value_result": 0, "max_threads": 1},
-        )
-        .strip()
-        .split("\t")[0:2]
-        == ["201902_1_1_0", "0"]
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt",
+        settings={"check_query_single_value_result": 0, "max_threads": 1},
+    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt",
-            settings={"check_query_single_value_result": 0, "max_threads": 1},
-        )
-        .strip()
-        .split("\t")[0:2]
-        == ["201902_1_1_0", "0"]
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt",
+        settings={"check_query_single_value_result": 0, "max_threads": 1},
+    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
 
     node1.query(
         "INSERT INTO non_replicated_mt VALUES (toDate('2019-01-01'), 1, 10), (toDate('2019-01-01'), 2, 12)"
@@ -133,15 +123,10 @@ def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
 
     remove_checksums_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt PARTITION 201901",
-            settings={"check_query_single_value_result": 0, "max_threads": 1},
-        )
-        .strip()
-        .split("\t")[0:2]
-        == ["201901_2_2_0", "0"]
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt PARTITION 201901",
+        settings={"check_query_single_value_result": 0, "max_threads": 1},
+    ).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
 
 
 @pytest.mark.parametrize("merge_tree_settings, zk_path_suffix", [("", "_0")])
@@ -209,15 +194,12 @@ def test_check_replicated_table_simple(
         == "201901_0_0_0\t1\t\n"
     )
 
-    assert (
-        sorted(
-            node2.query(
-                "CHECK TABLE replicated_mt",
-                settings={"check_query_single_value_result": 0},
-            ).split("\n")
-        )
-        == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"]
-    )
+    assert sorted(
+        node2.query(
+            "CHECK TABLE replicated_mt",
+            settings={"check_query_single_value_result": 0},
+        ).split("\n")
+    ) == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"]
 
     with pytest.raises(QueryRuntimeException) as exc:
         node2.query(
@@ -291,13 +273,10 @@ def test_check_replicated_table_corruption(
     )
 
     node1.query_with_retry("SYSTEM SYNC REPLICA replicated_mt_1")
-    assert (
-        node1.query(
-            "CHECK TABLE replicated_mt_1 PARTITION 201901",
-            settings={"check_query_single_value_result": 0, "max_threads": 1},
-        )
-        == "{}\t1\t\n".format(part_name)
-    )
+    assert node1.query(
+        "CHECK TABLE replicated_mt_1 PARTITION 201901",
+        settings={"check_query_single_value_result": 0, "max_threads": 1},
+    ) == "{}\t1\t\n".format(part_name)
     assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
     remove_part_from_disk(node2, "replicated_mt_1", part_name)
@@ -309,13 +288,10 @@ def test_check_replicated_table_corruption(
     )
 
     node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
-    assert (
-        node1.query(
-            "CHECK TABLE replicated_mt_1 PARTITION 201901",
-            settings={"check_query_single_value_result": 0, "max_threads": 1},
-        )
-        == "{}\t1\t\n".format(part_name)
-    )
+    assert node1.query(
+        "CHECK TABLE replicated_mt_1 PARTITION 201901",
+        settings={"check_query_single_value_result": 0, "max_threads": 1},
+    ) == "{}\t1\t\n".format(part_name)
     assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
 
diff --git a/tests/integration/test_cluster_discovery/test.py b/tests/integration/test_cluster_discovery/test.py
index a2e7e15b956..ad3deb5b142 100644
--- a/tests/integration/test_cluster_discovery/test.py
+++ b/tests/integration/test_cluster_discovery/test.py
@@ -61,7 +61,7 @@ def check_on_cluster(
         print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}")
 
         if retry != retries:
-            time.sleep(2 ** retry)
+            time.sleep(2**retry)
     else:
         msg = msg or f"Wrong '{what}' result"
         raise Exception(
diff --git a/tests/integration/test_ldap_external_user_directory/test.py b/tests/integration/test_ldap_external_user_directory/test.py
index c9642c293ee..39753794d63 100644
--- a/tests/integration/test_ldap_external_user_directory/test.py
+++ b/tests/integration/test_ldap_external_user_directory/test.py
@@ -76,14 +76,11 @@ def test_role_mapping(ldap_cluster):
         "select currentUser()", user="johndoe", password="qwertz"
     ) == TSV([["johndoe"]])
 
-    assert (
-        instance.query(
-            "select role_name from system.current_roles ORDER BY role_name",
-            user="johndoe",
-            password="qwertz",
-        )
-        == TSV([["role_1"], ["role_2"]])
-    )
+    assert instance.query(
+        "select role_name from system.current_roles ORDER BY role_name",
+        user="johndoe",
+        password="qwertz",
+    ) == TSV([["role_1"], ["role_2"]])
 
     instance.query("CREATE ROLE role_3")
     add_ldap_group(ldap_cluster, group_cn="clickhouse-role_3", member_cn="johndoe")
@@ -91,11 +88,8 @@ def test_role_mapping(ldap_cluster):
     # See https://github.com/ClickHouse/ClickHouse/issues/54318
     add_ldap_group(ldap_cluster, group_cn="clickhouse-role_4", member_cn="johndoe")
 
-    assert (
-        instance.query(
-            "select role_name from system.current_roles ORDER BY role_name",
-            user="johndoe",
-            password="qwertz",
-        )
-        == TSV([["role_1"], ["role_2"], ["role_3"]])
-    )
+    assert instance.query(
+        "select role_name from system.current_roles ORDER BY role_name",
+        user="johndoe",
+        password="qwertz",
+    ) == TSV([["role_1"], ["role_2"], ["role_3"]])
diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 61e76c0dc97..7a69d07633c 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -854,14 +854,14 @@ def test_types(started_cluster):
 
     result = cursor.fetchall()[0]
     expected = [
-        ("Int8_column", -(2 ** 7)),
-        ("UInt8_column", 2 ** 8 - 1),
-        ("Int16_column", -(2 ** 15)),
-        ("UInt16_column", 2 ** 16 - 1),
-        ("Int32_column", -(2 ** 31)),
-        ("UInt32_column", 2 ** 32 - 1),
-        ("Int64_column", -(2 ** 63)),
-        ("UInt64_column", 2 ** 64 - 1),
+        ("Int8_column", -(2**7)),
+        ("UInt8_column", 2**8 - 1),
+        ("Int16_column", -(2**15)),
+        ("UInt16_column", 2**16 - 1),
+        ("Int32_column", -(2**31)),
+        ("UInt32_column", 2**32 - 1),
+        ("Int64_column", -(2**63)),
+        ("UInt64_column", 2**64 - 1),
         ("String_column", "тест"),
         ("FixedString_column", "тест"),
         ("Float32_column", 1.5),
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index d39787f8924..054418a8ba9 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -561,7 +561,9 @@ def test_make_clone_in_detached(started_cluster):
         ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"]
     )
     assert_eq_with_retry(instance, "select * from clone_in_detached", "\n")
-    assert ["broken_all_0_0_0",] == sorted(
+    assert [
+        "broken_all_0_0_0",
+    ] == sorted(
         instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n")
     )
 
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 16425c9bd9e..1fc3fe37044 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -506,9 +506,12 @@ def test_alters_from_different_replicas(started_cluster):
     dummy_node.stop_clickhouse(kill=True)
 
     settings = {"distributed_ddl_task_timeout": 5}
-    assert "There are 1 unfinished hosts (0 of them are currently executing the task" in competing_node.query_and_get_error(
-        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
-        settings=settings,
+    assert (
+        "There are 1 unfinished hosts (0 of them are currently executing the task"
+        in competing_node.query_and_get_error(
+            "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
+            settings=settings,
+        )
     )
     settings = {
         "distributed_ddl_task_timeout": 5,
diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py
index 5a315707efb..647626d8014 100644
--- a/tests/integration/test_replicated_database_cluster_groups/test.py
+++ b/tests/integration/test_replicated_database_cluster_groups/test.py
@@ -95,9 +95,12 @@ def test_cluster_groups(started_cluster):
     # Exception
     main_node_2.stop_clickhouse()
     settings = {"distributed_ddl_task_timeout": 5}
-    assert "There are 1 unfinished hosts (0 of them are currently executing the task)" in main_node_1.query_and_get_error(
-        "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
-        settings=settings,
+    assert (
+        "There are 1 unfinished hosts (0 of them are currently executing the task)"
+        in main_node_1.query_and_get_error(
+            "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
+            settings=settings,
+        )
     )
 
     # 3. After start both groups are synced
diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
index 3632fa15d8a..5ef781bdc9e 100644
--- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
+++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
@@ -9,7 +9,7 @@ import time
 
 def gen_n_digit_number(n):
     assert 0 < n < 19
-    return random.randint(10 ** (n - 1), 10 ** n - 1)
+    return random.randint(10 ** (n - 1), 10**n - 1)
 
 
 sum_in_4_column = 0
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index e941356261a..2549cb0d473 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -553,13 +553,16 @@ def test_multipart(started_cluster, maybe_auth, positive):
         assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
 
     # select uploaded data from many threads
-    select_query = "select sum(column1), sum(column2), sum(column3) " "from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
-        host=started_cluster.minio_redirect_host,
-        port=started_cluster.minio_redirect_port,
-        bucket=bucket,
-        filename=filename,
-        auth=maybe_auth,
-        table_format=table_format,
+    select_query = (
+        "select sum(column1), sum(column2), sum(column3) "
+        "from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
+            host=started_cluster.minio_redirect_host,
+            port=started_cluster.minio_redirect_port,
+            bucket=bucket,
+            filename=filename,
+            auth=maybe_auth,
+            table_format=table_format,
+        )
     )
     try:
         select_result = run_query(
diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py
index 771df49cbac..7ff7a871413 100644
--- a/tests/integration/test_storage_url/test.py
+++ b/tests/integration/test_storage_url/test.py
@@ -79,21 +79,15 @@ def test_table_function_url_access_rights():
         f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1"
     )
 
-    assert (
-        node1.query(
-            f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
-            user="u1",
-        )
-        == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
-    )
+    assert node1.query(
+        f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
+        user="u1",
+    ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
 
-    assert (
-        node1.query(
-            f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
-            user="u1",
-        )
-        == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
-    )
+    assert node1.query(
+        f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
+        user="u1",
+    ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
 
     expected_error = "necessary to have the grant URL ON *.*"
     assert expected_error in node1.query_and_get_error(
diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py
index bacb0eb500d..6dbe6c891f2 100644
--- a/tests/integration/test_system_merges/test.py
+++ b/tests/integration/test_system_merges/test.py
@@ -204,33 +204,36 @@ def test_mutation_simple(started_cluster, replicated):
             sleep_time=0.1,
         )
 
-        assert split_tsv(
-            node_check.query(
-                """
+        assert (
+            split_tsv(
+                node_check.query(
+                    """
             SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation
                 FROM system.merges
                 WHERE table = '{name}'
         """.format(
-                    name=table_name
+                        name=table_name
+                    )
                 )
             )
-        ) == [
-            [
-                db_name,
-                table_name,
-                "1",
-                "['{}']".format(part),
-                "['{clickhouse}/{table_path}/{}/']".format(
-                    part, clickhouse=clickhouse_path, table_path=table_path
-                ),
-                result_part,
-                "{clickhouse}/{table_path}/{}/".format(
-                    result_part, clickhouse=clickhouse_path, table_path=table_path
-                ),
-                "all",
-                "1",
-            ],
-        ]
+            == [
+                [
+                    db_name,
+                    table_name,
+                    "1",
+                    "['{}']".format(part),
+                    "['{clickhouse}/{table_path}/{}/']".format(
+                        part, clickhouse=clickhouse_path, table_path=table_path
+                    ),
+                    result_part,
+                    "{clickhouse}/{table_path}/{}/".format(
+                        result_part, clickhouse=clickhouse_path, table_path=table_path
+                    ),
+                    "all",
+                    "1",
+                ],
+            ]
+        )
         t.join()
 
         assert (
diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
index 9bf7817c7d3..6218047af3c 100644
--- a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
+++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py
@@ -8,17 +8,16 @@ from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import message as _message
 from google.protobuf import reflection as _reflection
 from google.protobuf import symbol_database as _symbol_database
-
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result"\x00(\x01\x30\x01\x62\x06proto3'
-)
 
-_LOGSLEVEL = DESCRIPTOR.enum_types_by_name["LogsLevel"]
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3')
+
+_LOGSLEVEL = DESCRIPTOR.enum_types_by_name['LogsLevel']
 LogsLevel = enum_type_wrapper.EnumTypeWrapper(_LOGSLEVEL)
 LOG_NONE = 0
 LOG_FATAL = 1
@@ -31,180 +30,134 @@ LOG_DEBUG = 7
 LOG_TRACE = 8
 
 
-_NAMEANDTYPE = DESCRIPTOR.message_types_by_name["NameAndType"]
-_EXTERNALTABLE = DESCRIPTOR.message_types_by_name["ExternalTable"]
-_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name["SettingsEntry"]
-_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name[
-    "ObsoleteTransportCompression"
-]
-_QUERYINFO = DESCRIPTOR.message_types_by_name["QueryInfo"]
-_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name["SettingsEntry"]
-_LOGENTRY = DESCRIPTOR.message_types_by_name["LogEntry"]
-_PROGRESS = DESCRIPTOR.message_types_by_name["Progress"]
-_STATS = DESCRIPTOR.message_types_by_name["Stats"]
-_EXCEPTION = DESCRIPTOR.message_types_by_name["Exception"]
-_RESULT = DESCRIPTOR.message_types_by_name["Result"]
-_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = (
-    _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name["CompressionAlgorithm"]
-)
-_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = (
-    _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name["CompressionLevel"]
-)
-NameAndType = _reflection.GeneratedProtocolMessageType(
-    "NameAndType",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _NAMEANDTYPE,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType)
-    },
-)
+_NAMEANDTYPE = DESCRIPTOR.message_types_by_name['NameAndType']
+_EXTERNALTABLE = DESCRIPTOR.message_types_by_name['ExternalTable']
+_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name['SettingsEntry']
+_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name['ObsoleteTransportCompression']
+_QUERYINFO = DESCRIPTOR.message_types_by_name['QueryInfo']
+_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name['SettingsEntry']
+_LOGENTRY = DESCRIPTOR.message_types_by_name['LogEntry']
+_PROGRESS = DESCRIPTOR.message_types_by_name['Progress']
+_STATS = DESCRIPTOR.message_types_by_name['Stats']
+_EXCEPTION = DESCRIPTOR.message_types_by_name['Exception']
+_RESULT = DESCRIPTOR.message_types_by_name['Result']
+_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionAlgorithm']
+_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionLevel']
+NameAndType = _reflection.GeneratedProtocolMessageType('NameAndType', (_message.Message,), {
+  'DESCRIPTOR' : _NAMEANDTYPE,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType)
+  })
 _sym_db.RegisterMessage(NameAndType)
 
-ExternalTable = _reflection.GeneratedProtocolMessageType(
-    "ExternalTable",
-    (_message.Message,),
-    {
-        "SettingsEntry": _reflection.GeneratedProtocolMessageType(
-            "SettingsEntry",
-            (_message.Message,),
-            {
-                "DESCRIPTOR": _EXTERNALTABLE_SETTINGSENTRY,
-                "__module__": "clickhouse_grpc_pb2"
-                # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry)
-            },
-        ),
-        "DESCRIPTOR": _EXTERNALTABLE,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable)
-    },
-)
+ExternalTable = _reflection.GeneratedProtocolMessageType('ExternalTable', (_message.Message,), {
+
+  'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), {
+    'DESCRIPTOR' : _EXTERNALTABLE_SETTINGSENTRY,
+    '__module__' : 'clickhouse_grpc_pb2'
+    # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry)
+    })
+  ,
+  'DESCRIPTOR' : _EXTERNALTABLE,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable)
+  })
 _sym_db.RegisterMessage(ExternalTable)
 _sym_db.RegisterMessage(ExternalTable.SettingsEntry)
 
-ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType(
-    "ObsoleteTransportCompression",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _OBSOLETETRANSPORTCOMPRESSION,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression)
-    },
-)
+ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType('ObsoleteTransportCompression', (_message.Message,), {
+  'DESCRIPTOR' : _OBSOLETETRANSPORTCOMPRESSION,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression)
+  })
 _sym_db.RegisterMessage(ObsoleteTransportCompression)
 
-QueryInfo = _reflection.GeneratedProtocolMessageType(
-    "QueryInfo",
-    (_message.Message,),
-    {
-        "SettingsEntry": _reflection.GeneratedProtocolMessageType(
-            "SettingsEntry",
-            (_message.Message,),
-            {
-                "DESCRIPTOR": _QUERYINFO_SETTINGSENTRY,
-                "__module__": "clickhouse_grpc_pb2"
-                # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry)
-            },
-        ),
-        "DESCRIPTOR": _QUERYINFO,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo)
-    },
-)
+QueryInfo = _reflection.GeneratedProtocolMessageType('QueryInfo', (_message.Message,), {
+
+  'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), {
+    'DESCRIPTOR' : _QUERYINFO_SETTINGSENTRY,
+    '__module__' : 'clickhouse_grpc_pb2'
+    # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry)
+    })
+  ,
+  'DESCRIPTOR' : _QUERYINFO,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo)
+  })
 _sym_db.RegisterMessage(QueryInfo)
 _sym_db.RegisterMessage(QueryInfo.SettingsEntry)
 
-LogEntry = _reflection.GeneratedProtocolMessageType(
-    "LogEntry",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _LOGENTRY,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry)
-    },
-)
+LogEntry = _reflection.GeneratedProtocolMessageType('LogEntry', (_message.Message,), {
+  'DESCRIPTOR' : _LOGENTRY,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry)
+  })
 _sym_db.RegisterMessage(LogEntry)
 
-Progress = _reflection.GeneratedProtocolMessageType(
-    "Progress",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _PROGRESS,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress)
-    },
-)
+Progress = _reflection.GeneratedProtocolMessageType('Progress', (_message.Message,), {
+  'DESCRIPTOR' : _PROGRESS,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress)
+  })
 _sym_db.RegisterMessage(Progress)
 
-Stats = _reflection.GeneratedProtocolMessageType(
-    "Stats",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _STATS,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats)
-    },
-)
+Stats = _reflection.GeneratedProtocolMessageType('Stats', (_message.Message,), {
+  'DESCRIPTOR' : _STATS,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats)
+  })
 _sym_db.RegisterMessage(Stats)
 
-Exception = _reflection.GeneratedProtocolMessageType(
-    "Exception",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _EXCEPTION,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception)
-    },
-)
+Exception = _reflection.GeneratedProtocolMessageType('Exception', (_message.Message,), {
+  'DESCRIPTOR' : _EXCEPTION,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception)
+  })
 _sym_db.RegisterMessage(Exception)
 
-Result = _reflection.GeneratedProtocolMessageType(
-    "Result",
-    (_message.Message,),
-    {
-        "DESCRIPTOR": _RESULT,
-        "__module__": "clickhouse_grpc_pb2"
-        # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result)
-    },
-)
+Result = _reflection.GeneratedProtocolMessageType('Result', (_message.Message,), {
+  'DESCRIPTOR' : _RESULT,
+  '__module__' : 'clickhouse_grpc_pb2'
+  # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result)
+  })
 _sym_db.RegisterMessage(Result)
 
-_CLICKHOUSE = DESCRIPTOR.services_by_name["ClickHouse"]
+_CLICKHOUSE = DESCRIPTOR.services_by_name['ClickHouse']
 if _descriptor._USE_C_DESCRIPTORS == False:
 
-    DESCRIPTOR._options = None
-    _EXTERNALTABLE_SETTINGSENTRY._options = None
-    _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b"8\001"
-    _QUERYINFO_SETTINGSENTRY._options = None
-    _QUERYINFO_SETTINGSENTRY._serialized_options = b"8\001"
-    _LOGSLEVEL._serialized_start = 2363
-    _LOGSLEVEL._serialized_end = 2520
-    _NAMEANDTYPE._serialized_start = 42
-    _NAMEANDTYPE._serialized_end = 83
-    _EXTERNALTABLE._serialized_start = 86
-    _EXTERNALTABLE._serialized_end = 331
-    _EXTERNALTABLE_SETTINGSENTRY._serialized_start = 284
-    _EXTERNALTABLE_SETTINGSENTRY._serialized_end = 331
-    _OBSOLETETRANSPORTCOMPRESSION._serialized_start = 334
-    _OBSOLETETRANSPORTCOMPRESSION._serialized_end = 723
-    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start = 532
-    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end = 614
-    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start = 616
-    _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end = 723
-    _QUERYINFO._serialized_start = 726
-    _QUERYINFO._serialized_end = 1508
-    _QUERYINFO_SETTINGSENTRY._serialized_start = 284
-    _QUERYINFO_SETTINGSENTRY._serialized_end = 331
-    _LOGENTRY._serialized_start = 1511
-    _LOGENTRY._serialized_end = 1672
-    _PROGRESS._serialized_start = 1674
-    _PROGRESS._serialized_end = 1796
-    _STATS._serialized_start = 1798
-    _STATS._serialized_end = 1910
-    _EXCEPTION._serialized_start = 1912
-    _EXCEPTION._serialized_end = 1994
-    _RESULT._serialized_start = 1997
-    _RESULT._serialized_end = 2360
-    _CLICKHOUSE._serialized_start = 2523
-    _CLICKHOUSE._serialized_end = 2870
+  DESCRIPTOR._options = None
+  _EXTERNALTABLE_SETTINGSENTRY._options = None
+  _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b'8\001'
+  _QUERYINFO_SETTINGSENTRY._options = None
+  _QUERYINFO_SETTINGSENTRY._serialized_options = b'8\001'
+  _LOGSLEVEL._serialized_start=2363
+  _LOGSLEVEL._serialized_end=2520
+  _NAMEANDTYPE._serialized_start=42
+  _NAMEANDTYPE._serialized_end=83
+  _EXTERNALTABLE._serialized_start=86
+  _EXTERNALTABLE._serialized_end=331
+  _EXTERNALTABLE_SETTINGSENTRY._serialized_start=284
+  _EXTERNALTABLE_SETTINGSENTRY._serialized_end=331
+  _OBSOLETETRANSPORTCOMPRESSION._serialized_start=334
+  _OBSOLETETRANSPORTCOMPRESSION._serialized_end=723
+  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start=532
+  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end=614
+  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start=616
+  _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end=723
+  _QUERYINFO._serialized_start=726
+  _QUERYINFO._serialized_end=1508
+  _QUERYINFO_SETTINGSENTRY._serialized_start=284
+  _QUERYINFO_SETTINGSENTRY._serialized_end=331
+  _LOGENTRY._serialized_start=1511
+  _LOGENTRY._serialized_end=1672
+  _PROGRESS._serialized_start=1674
+  _PROGRESS._serialized_end=1796
+  _STATS._serialized_start=1798
+  _STATS._serialized_end=1910
+  _EXCEPTION._serialized_start=1912
+  _EXCEPTION._serialized_end=1994
+  _RESULT._serialized_start=1997
+  _RESULT._serialized_end=2360
+  _CLICKHOUSE._serialized_start=2523
+  _CLICKHOUSE._serialized_end=2870
 # @@protoc_insertion_point(module_scope)
diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
index 25643a243b3..1c71218bbe5 100644
--- a/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
+++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2_grpc.py
@@ -15,25 +15,25 @@ class ClickHouseStub(object):
             channel: A grpc.Channel.
         """
         self.ExecuteQuery = channel.unary_unary(
-            "/clickhouse.grpc.ClickHouse/ExecuteQuery",
-            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-        )
+                '/clickhouse.grpc.ClickHouse/ExecuteQuery',
+                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+                )
         self.ExecuteQueryWithStreamInput = channel.stream_unary(
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput",
-            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-        )
+                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput',
+                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+                )
         self.ExecuteQueryWithStreamOutput = channel.unary_stream(
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput",
-            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-        )
+                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput',
+                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+                )
         self.ExecuteQueryWithStreamIO = channel.stream_stream(
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO",
-            request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
-            response_deserializer=clickhouse__grpc__pb2.Result.FromString,
-        )
+                '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO',
+                request_serializer=clickhouse__grpc__pb2.QueryInfo.SerializeToString,
+                response_deserializer=clickhouse__grpc__pb2.Result.FromString,
+                )
 
 
 class ClickHouseServicer(object):
@@ -42,173 +42,124 @@ class ClickHouseServicer(object):
     def ExecuteQuery(self, request, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details("Method not implemented!")
-        raise NotImplementedError("Method not implemented!")
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 
     def ExecuteQueryWithStreamInput(self, request_iterator, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details("Method not implemented!")
-        raise NotImplementedError("Method not implemented!")
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 
     def ExecuteQueryWithStreamOutput(self, request, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details("Method not implemented!")
-        raise NotImplementedError("Method not implemented!")
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 
     def ExecuteQueryWithStreamIO(self, request_iterator, context):
         """Missing associated documentation comment in .proto file."""
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details("Method not implemented!")
-        raise NotImplementedError("Method not implemented!")
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 
 
 def add_ClickHouseServicer_to_server(servicer, server):
     rpc_method_handlers = {
-        "ExecuteQuery": grpc.unary_unary_rpc_method_handler(
-            servicer.ExecuteQuery,
-            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-        ),
-        "ExecuteQueryWithStreamInput": grpc.stream_unary_rpc_method_handler(
-            servicer.ExecuteQueryWithStreamInput,
-            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-        ),
-        "ExecuteQueryWithStreamOutput": grpc.unary_stream_rpc_method_handler(
-            servicer.ExecuteQueryWithStreamOutput,
-            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-        ),
-        "ExecuteQueryWithStreamIO": grpc.stream_stream_rpc_method_handler(
-            servicer.ExecuteQueryWithStreamIO,
-            request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
-            response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
-        ),
+            'ExecuteQuery': grpc.unary_unary_rpc_method_handler(
+                    servicer.ExecuteQuery,
+                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+            ),
+            'ExecuteQueryWithStreamInput': grpc.stream_unary_rpc_method_handler(
+                    servicer.ExecuteQueryWithStreamInput,
+                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+            ),
+            'ExecuteQueryWithStreamOutput': grpc.unary_stream_rpc_method_handler(
+                    servicer.ExecuteQueryWithStreamOutput,
+                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+            ),
+            'ExecuteQueryWithStreamIO': grpc.stream_stream_rpc_method_handler(
+                    servicer.ExecuteQueryWithStreamIO,
+                    request_deserializer=clickhouse__grpc__pb2.QueryInfo.FromString,
+                    response_serializer=clickhouse__grpc__pb2.Result.SerializeToString,
+            ),
     }
     generic_handler = grpc.method_handlers_generic_handler(
-        "clickhouse.grpc.ClickHouse", rpc_method_handlers
-    )
+            'clickhouse.grpc.ClickHouse', rpc_method_handlers)
     server.add_generic_rpc_handlers((generic_handler,))
 
 
-# This class is part of an EXPERIMENTAL API.
+ # This class is part of an EXPERIMENTAL API.
 class ClickHouse(object):
     """Missing associated documentation comment in .proto file."""
 
     @staticmethod
-    def ExecuteQuery(
-        request,
-        target,
-        options=(),
-        channel_credentials=None,
-        call_credentials=None,
-        insecure=False,
-        compression=None,
-        wait_for_ready=None,
-        timeout=None,
-        metadata=None,
-    ):
-        return grpc.experimental.unary_unary(
-            request,
+    def ExecuteQuery(request,
             target,
-            "/clickhouse.grpc.ClickHouse/ExecuteQuery",
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/clickhouse.grpc.ClickHouse/ExecuteQuery',
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-        )
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 
     @staticmethod
-    def ExecuteQueryWithStreamInput(
-        request_iterator,
-        target,
-        options=(),
-        channel_credentials=None,
-        call_credentials=None,
-        insecure=False,
-        compression=None,
-        wait_for_ready=None,
-        timeout=None,
-        metadata=None,
-    ):
-        return grpc.experimental.stream_unary(
-            request_iterator,
+    def ExecuteQueryWithStreamInput(request_iterator,
             target,
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput",
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.stream_unary(request_iterator, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamInput',
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-        )
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 
     @staticmethod
-    def ExecuteQueryWithStreamOutput(
-        request,
-        target,
-        options=(),
-        channel_credentials=None,
-        call_credentials=None,
-        insecure=False,
-        compression=None,
-        wait_for_ready=None,
-        timeout=None,
-        metadata=None,
-    ):
-        return grpc.experimental.unary_stream(
-            request,
+    def ExecuteQueryWithStreamOutput(request,
             target,
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput",
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_stream(request, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamOutput',
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-        )
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 
     @staticmethod
-    def ExecuteQueryWithStreamIO(
-        request_iterator,
-        target,
-        options=(),
-        channel_credentials=None,
-        call_credentials=None,
-        insecure=False,
-        compression=None,
-        wait_for_ready=None,
-        timeout=None,
-        metadata=None,
-    ):
-        return grpc.experimental.stream_stream(
-            request_iterator,
+    def ExecuteQueryWithStreamIO(request_iterator,
             target,
-            "/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO",
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.stream_stream(request_iterator, target, '/clickhouse.grpc.ClickHouse/ExecuteQueryWithStreamIO',
             clickhouse__grpc__pb2.QueryInfo.SerializeToString,
             clickhouse__grpc__pb2.Result.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-        )
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

From 5cda358e62c90a2345a60a249b6d7e8430f6454d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 06:26:04 +0100
Subject: [PATCH 083/884] Obey Python's quirky formatter

---
 tests/clickhouse-test | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index bd796dbfdf2..dd9047c293f 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -75,11 +75,13 @@ def stringhash(s):
     # only during process invocation https://stackoverflow.com/a/42089311
     return zlib.crc32(s.encode("utf-8"))
 
+
 def read_file_as_binary_string(file_path):
-    with open(file_path, 'rb') as file:
+    with open(file_path, "rb") as file:
         binary_data = file.read()
     return binary_data
 
+
 # First and last lines of the log
 def trim_for_log(s):
     if not s:
@@ -146,7 +148,7 @@ def clickhouse_execute_http(
             client.request(
                 "POST",
                 f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}",
-                body=body
+                body=body,
             )
             res = client.getresponse()
             data = res.read()

From 71bef27abfa9cd64a318306ddd11b21b907a37ac Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 06:26:54 +0100
Subject: [PATCH 084/884] Follow-up

---
 docker/test/base/setup_export_logs.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 26fcd10d666..96a15c33674 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -194,7 +194,7 @@ function setup_logs_replication
         echo "Creating table system.${table}_sender" >&2
 
         # Create Distributed table and materialized view to watch on the original table:
-        clickhouse-client --asterisk_include_materialized_columns 1 --query "
+        clickhouse-client --query "
             CREATE TABLE system.${table}_sender
             ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
             SETTINGS flush_on_detach=0
@@ -205,7 +205,7 @@ function setup_logs_replication
 
         echo "Creating materialized view system.${table}_watcher" >&2
 
-        clickhouse-client --asterisk_include_materialized_columns 1 --query "
+        clickhouse-client --query "
             CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
             SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
             FROM system.${table}

From c5024a5f6d7f88f0fd8dc2af2c52eb1c1d57d2c2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 Jan 2024 23:36:48 +0100
Subject: [PATCH 085/884] Fix typo

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 96a15c33674..416281c2aa3 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -30,7 +30,7 @@ EXTRA_COLUMNS_EXPRESSION_COVERAGE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -
 
 function __set_connection_args
 {
-    # It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
+    # It's impossible to use a generic $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
     # That's why we must stick to the generated option
     CONNECTION_ARGS=(
         --receive_timeout=45 --send_timeout=45 --secure

From 1caef191436fc05856be3b85b19cfcd97d0dc804 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 16 Jan 2024 09:44:52 +0100
Subject: [PATCH 086/884] Maybe better

---
 programs/main.cpp | 81 ++++++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 8958d84e243..1ff7e5db560 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -394,6 +394,50 @@ void checkHarmfulEnvironmentVariables(char ** argv)
 }
 #endif
 
+
+#if defined(SANITIZE_COVERAGE)
+__attribute__((no_sanitize("coverage")))
+void dumpCoverage()
+{
+    /// A user can request to dump the coverage information into files at exit.
+    /// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
+    /// that cannot introspect it with SQL functions at runtime.
+
+    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
+    /// containing the list of addresses of covered .
+
+    /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
+
+    if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
+    {
+        auto dump = [](const std::string & name, auto span)
+        {
+            /// Write only non-zeros.
+            std::vector<uintptr_t> data;
+            data.reserve(span.size());
+            for (auto addr : span)
+                if (addr)
+                    data.push_back(addr);
+
+            int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
+            if (-1 == fd)
+            {
+                writeError("Cannot open a file to write the coverage data\n");
+            }
+            else
+            {
+                if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
+                    writeError("Cannot write the coverage data to a file\n");
+                if (0 != ::close(fd))
+                    writeError("Cannot close the file with coverage data\n");
+            }
+        };
+
+        dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
+    }
+}
+#endif
+
 }
 
 bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
@@ -517,42 +561,7 @@ int main(int argc_, char ** argv_)
     int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
 
 #if defined(SANITIZE_COVERAGE)
-    /// A user can request to dump the coverage information into files at exit.
-    /// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
-    /// that cannot introspect it with SQL functions at runtime.
-
-    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
-    /// containing the list of addresses of covered .
-
-    /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
-
-    if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
-    {
-        auto dumpCoverage = [](const std::string & name, auto span)
-        {
-            /// Write only non-zeros.
-            std::vector<uintptr_t> data;
-            data.reserve(span.size());
-            for (auto addr : span)
-                if (addr)
-                    data.push_back(addr);
-
-            int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
-            if (-1 == fd)
-            {
-                writeError("Cannot open a file to write the coverage data\n");
-            }
-            else
-            {
-                if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
-                    writeError("Cannot write the coverage data to a file\n");
-                if (0 != ::close(fd))
-                    writeError("Cannot close the file with coverage data\n");
-            }
-        };
-
-        dumpCoverage(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
-    }
+    dumpCoverage();
 #endif
 
     return exit_code;

From 21082be9a681166b5585445c8aed62e705063081 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 16 Jan 2024 10:41:13 +0100
Subject: [PATCH 087/884] Better test

---
 tests/clickhouse-test | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index dd9047c293f..6d398115d43 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1269,13 +1269,17 @@ class TestCase:
             file_pattern = "coverage.*"
             matching_files = glob.glob(file_pattern)
             for file_path in matching_files:
-                body = read_file_as_binary_string(file_path)
-                clickhouse_execute(
-                    args,
-                    f"INSERT INTO system.coverage_log SELECT now(), '{self.case}', groupArray(data) FROM input('data UInt64') FORMAT RowBinary",
-                    body=body,
-                    retry_error_codes=True,
-                )
+                try:
+                    body = read_file_as_binary_string(file_path)
+                    clickhouse_execute(
+                        args,
+                        f"INSERT INTO system.coverage_log SELECT now(), '{self.case}', groupArray(data) FROM input('data UInt64') FORMAT RowBinary",
+                        body=body,
+                        retry_error_codes=True,
+                    )
+                except Exception as e:
+                    print("Cannot insert coverage data: ", str(e))
+                # Remove the file even in case of exception to avoid accumulation and quadratic complexity.
                 os.remove(file_path)
 
             coverage = clickhouse_execute(

From 6b8d53a9fa54e53c766c431201ea8dfd742630ea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 16 Jan 2024 11:07:30 +0100
Subject: [PATCH 088/884] Remove obsolete comment

---
 tests/clickhouse-test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 6d398115d43..02693b997b4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -412,7 +412,6 @@ def get_stacktraces_from_gdb(server_pid):
 
 
 # collect server stacktraces from system.stack_trace table
-# it does not work in Sandbox
 def get_stacktraces_from_clickhouse(args):
     settings_str = " ".join(
         [

From 605c76e66ea5bdd2644026a5c7425e87f24c3702 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 16 Jan 2024 11:22:27 +0100
Subject: [PATCH 089/884] Fix test fails

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 0dc3026afc0..b235918c438 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6400,23 +6400,27 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
 {
 
     String database_name = scope.context->getCurrentDatabase();
-    String table_name = table_function_node->getOriginalAST()->as<ASTFunction>()->name;
+    String table_name = "";
 
-    if (table_function_node->getOriginalAST()->as<ASTFunction>()->is_compound_name)
+    if (table_function_node->getOriginalAST() && table_function_node->getOriginalAST()->as<ASTFunction>())
     {
-        std::vector<std::string> parts;
-        splitInto<'.'>(parts, table_function_node->getOriginalAST()->as<ASTFunction>()->name);
-
-        if (parts.size() == 2)
+        table_name = table_function_node->getOriginalAST()->as<ASTFunction>()->name;
+        if (table_function_node->getOriginalAST()->as<ASTFunction>()->is_compound_name)
         {
-            database_name = parts[0];
-            table_name = parts[1];
+            std::vector<std::string> parts;
+            splitInto<'.'>(parts, table_function_node->getOriginalAST()->as<ASTFunction>()->name);
+
+            if (parts.size() == 2)
+            {
+                database_name = parts[0];
+                table_name = parts[1];
+            }
         }
     }
 
     auto & table_function_node_typed = table_function_node->as<TableFunctionNode &>();
 
-    StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, scope.context->getQueryContext());
+    StoragePtr table = table_name.empty() ? nullptr : DatabaseCatalog::instance().tryGetTable({database_name, table_name}, scope.context->getQueryContext());
     if (table)
     {
         if (table.get()->isView() && table->as<StorageView>() && table->as<StorageView>()->isParameterizedView())

From 8d88f4cf87d13c6760a5235abf4180102daf8b5c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 17 Jan 2024 09:42:53 +0100
Subject: [PATCH 090/884] Update setting is_parameterized_view & settings
 columns for view

---
 src/Interpreters/InterpreterCreateQuery.cpp | 5 ++++-
 src/Storages/StorageView.cpp                | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 7985785aa9f..6031c8b4e46 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -780,8 +780,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
         properties.constraints = as_storage_metadata->getConstraints();
     }
-    else if (create.select && !create.isParameterizedView())
+    else if (create.select)
     {
+        if (create.isParameterizedView())
+            return properties;
+
         Block as_select_sample;
 
         if (getContext()->getSettingsRef().allow_experimental_analyzer)
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 1898e49de86..6b80e2450c4 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -112,7 +112,7 @@ StorageView::StorageView(
     : IStorage(table_id_)
 {
     StorageInMemoryMetadata storage_metadata;
-    if (is_parameterized_view_)
+    if (!is_parameterized_view_)
     {
         if (!query.isParameterizedView())
             storage_metadata.setColumns(columns_);

From d3b4dea8058e1cccb34bf39b3f26b4c0e5b2368a Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 17 Jan 2024 20:02:17 +0100
Subject: [PATCH 091/884] Fix clang tidy build

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp   | 2 +-
 src/Interpreters/InterpreterCreateQuery.cpp | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index d2270ea9910..7322d53d831 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6401,7 +6401,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
 {
 
     String database_name = scope.context->getCurrentDatabase();
-    String table_name = "";
+    String table_name;
 
     if (table_function_node->getOriginalAST() && table_function_node->getOriginalAST()->as<ASTFunction>())
     {
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 6031c8b4e46..e71946caafe 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -809,11 +809,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
               * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments.
               */
 
-            SelectQueryOptions options;
-            if (create.isParameterizedView())
-                options = options.createParameterizedView();
-
-            InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), options);
+            InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), SelectQueryOptions());
             as_select_sample = interpreter.getSampleBlock();
         }
 

From e3f5058f6129badab2e0071e86f51ffb77e57ce5 Mon Sep 17 00:00:00 2001
From: MyroTk <44327070+MyroTk@users.noreply.github.com>
Date: Wed, 17 Jan 2024 12:13:15 -0800
Subject: [PATCH 092/884] Update Dockerfile

---
 docker/test/integration/runner/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index c795fbf0672..2a81db78a3d 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -94,6 +94,7 @@ RUN python3 -m pip install --no-cache-dir \
     pytest-repeat \
     pytest-timeout \
     pytest-xdist \
+    pytest-reportlog==0.4.0 \
     pytz \
     pyyaml==5.3.1 \
     redis \

From f89803ebf65d7590e73816052b7ac2de81e04864 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 17 Jan 2024 23:17:53 +0100
Subject: [PATCH 093/884] Slightly better

---
 docker/test/base/setup_export_logs.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 416281c2aa3..043adf99ffc 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -156,7 +156,8 @@ function setup_logs_replication
             # Do not try to resolve stack traces in case of debug/sanitizers
             # build, since it is too slow (flushing of trace_log can take ~1min
             # with such MV attached)
-            if [[ "$debug_or_sanitizer_build" = 1 ]]; then
+            if [[ "$debug_or_sanitizer_build" = 1 ]]
+            then
                 EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
             else
                 EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
@@ -180,7 +181,7 @@ function setup_logs_replication
         # Create the destination table with adapted name and structure:
         statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
             s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
-            s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
+            s/ORDER BY \(?(.+?)\)?/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)'/;
             s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
             /^TTL /d
             ')

From f9ca4e3b8541d7db85effa3f9be286f7ad916965 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 00:52:05 +0100
Subject: [PATCH 094/884] Slightly better

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 043adf99ffc..7033d4b52e2 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -181,7 +181,7 @@ function setup_logs_replication
         # Create the destination table with adapted name and structure:
         statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
             s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
-            s/ORDER BY \(?(.+?)\)?/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)'/;
+            s/ORDER BY \(?(.+?)\)?/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)/;
             s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
             /^TTL /d
             ')

From 61101d1a577b441931ef74b24d449b085d0f0ec3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 01:16:50 +0100
Subject: [PATCH 095/884] Add a release build with coverage, turn off coverage
 in the debug build

---
 .github/workflows/master.yml       |  8 ++++++++
 .github/workflows/pull_request.yml |  8 ++++++++
 tests/ci/ci_config.py              | 17 +++++++++++++----
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index d2865eb737d..50d3eb4a062 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -98,6 +98,14 @@ jobs:
       build_name: package_release
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+  BuilderDebReleaseCoverage:
+    needs: [ RunConfig, BuildDockers ]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_build.yml
+    with:
+      build_name: package_release_coverage
+      checkout_depth: 0
+      data: ${{ needs.RunConfig.outputs.data }}
   BuilderDebAarch64:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index bd2b2b60904..7f843f82c01 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -146,6 +146,14 @@ jobs:
       build_name: package_release
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+  BuilderDebReleaseCoverage:
+    needs: [ RunConfig, FastTest ]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_build.yml
+    with:
+      build_name: package_release_coverage
+      checkout_depth: 0
+      data: ${{ needs.RunConfig.outputs.data }}
   BuilderDebAarch64:
     needs: [RunConfig, FastTest]
     if: ${{ !failure() && !cancelled() }}
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b8dff3f0a28..1ca4e06bc8c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -474,6 +474,12 @@ CI_CONFIG = CiConfig(
             name="package_debug",
             compiler="clang-17",
             debug_build=True,
+            package_type="deb",
+            sparse_checkout=True,
+        ),
+        "package_release_coverage": BuildConfig(
+            name="package_release_coverage",
+            compiler="clang-17",
             coverage=True,
             package_type="deb",
             sparse_checkout=True,
@@ -571,6 +577,7 @@ CI_CONFIG = CiConfig(
                 "package_tsan",
                 "package_msan",
                 "package_debug",
+                "package_release_coverage",
                 "binary_release",
                 "fuzzers",
             ]
@@ -660,16 +667,15 @@ CI_CONFIG = CiConfig(
         "Stateful tests (release)": TestConfig(
             "package_release", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
+        "Stateful tests (coverage)": TestConfig(
+            "package_release_coverage", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+        ),
         "Stateful tests (aarch64)": TestConfig(
             "package_aarch64", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
         "Stateful tests (release, DatabaseOrdinary)": TestConfig(
             "package_release", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
-        # "Stateful tests (release, DatabaseReplicated)": TestConfig(
-        #     "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore
-        # ),
-        # Stateful tests for parallel replicas
         "Stateful tests (release, ParallelReplicas)": TestConfig(
             "package_release", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
@@ -712,6 +718,9 @@ CI_CONFIG = CiConfig(
         "Stateless tests (release)": TestConfig(
             "package_release", job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),
+        "Stateless tests (coverage)": TestConfig(
+            "package_release_coverage", job_config=JobConfig(**statless_test_common_params)  # type: ignore
+        ),
         "Stateless tests (aarch64)": TestConfig(
             "package_aarch64", job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),

From b9f8fff623448e7013bbe604b39d0f72b81032f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 01:36:48 +0100
Subject: [PATCH 096/884] Fix YAML

---
 .github/workflows/master.yml       | 2 +-
 .github/workflows/pull_request.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 50d3eb4a062..1920f3a2a56 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -99,7 +99,7 @@ jobs:
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
   BuilderDebReleaseCoverage:
-    needs: [ RunConfig, BuildDockers ]
+    needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
     uses: ./.github/workflows/reusable_build.yml
     with:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 7f843f82c01..57199e6b9d9 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -147,7 +147,7 @@ jobs:
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
   BuilderDebReleaseCoverage:
-    needs: [ RunConfig, FastTest ]
+    needs: [RunConfig, FastTest]
     if: ${{ !failure() && !cancelled() }}
     uses: ./.github/workflows/reusable_build.yml
     with:

From cc5cc361ef561993bc7bbea6f1588562f7d3deae Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 12:35:49 +0100
Subject: [PATCH 097/884] Fix error

---
 docker/packager/packager | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index c310185b071..8efd3b8f302 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -289,7 +289,7 @@ def parse_env_variables(
         result.append("BUILD_TYPE=None")
 
     if coverage:
-        cmake_flags.append("-DSANITIZE_COVERAGE=1")
+        cmake_flags.append("-DSANITIZE_COVERAGE=1 -DBUILD_STANDALONE_KEEPER=0")
 
     if not cache:
         cmake_flags.append("-DCOMPILER_CACHE=disabled")

From 6c0445f36584a60724f7d616f47c7b953621997c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 12:47:18 +0100
Subject: [PATCH 098/884] Fix CMake

---
 cmake/sanitize.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 3882b51227e..23e9cc34fec 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -63,14 +63,14 @@ endif()
 option(WITH_COVERAGE "Instrumentation for code coverage with default implementation" OFF)
 
 if (WITH_COVERAGE)
-    message (INFORMATION "Enabled instrumentation for code coverage")
+    message (STATUS "Enabled instrumentation for code coverage")
     set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping")
 endif()
 
 option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
 
 if (SANITIZE_COVERAGE)
-    message (INFORMATION "Enabled instrumentation for code coverage")
+    message (STATUS "Enabled instrumentation for code coverage")
 
     # We set this define for whole build to indicate that at least some parts are compiled with coverage.
     # And to expose it in system.build_options.

From 6d6b8fcf8e988d78fc983ed4043ed556e36b833b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 18 Jan 2024 12:58:50 +0100
Subject: [PATCH 099/884] Add missing comments in code

---
 tests/ci/ci_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 1ca4e06bc8c..45bdfbecb0c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -55,6 +55,13 @@ class JobConfig:
     run_always: bool = False
 
 
+# About the "sparse_checkout" option:
+#
+# Misha f. Shiryaev
+# :facepalm:
+# we have this feature, it's used by devs, we need to test it in CI
+# It's not useful for the CI itself
+
 @dataclass
 class BuildConfig:
     name: str

From db3ffa5c86dba79ca7052abe8d53799ac3e4afb9 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 18 Jan 2024 12:11:03 +0000
Subject: [PATCH 100/884] Automatic style fix

---
 tests/ci/ci_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 45bdfbecb0c..ab37659e65b 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -62,6 +62,7 @@ class JobConfig:
 # we have this feature, it's used by devs, we need to test it in CI
 # It's not useful for the CI itself
 
+
 @dataclass
 class BuildConfig:
     name: str

From f1749217ee41b3b721fb8a185a929eb18db89b2f Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 18 Jan 2024 21:53:56 +0200
Subject: [PATCH 101/884] added format_schema_rows_template setting

---
 docs/en/operations/settings/settings-formats.md | 4 ++++
 src/Core/Settings.h                             | 1 +
 src/Formats/FormatFactory.cpp                   | 1 +
 src/Formats/FormatSettings.h                    | 1 +
 4 files changed, 7 insertions(+)

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index eb09af44efd..5dedaa2f6ab 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -1668,6 +1668,10 @@ Path to file which contains format string for rows (for Template format).
 
 Delimiter between rows (for Template format).
 
+### format_schema_rows_template {#format_schema_rows_template}
+
+Format string for rows (for Template format)
+
 ## CustomSeparated format settings {custom-separated-format-settings}
 
 ### format_custom_escaping_rule {#format_custom_escaping_rule}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 292e945a29c..4de739ec405 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1079,6 +1079,7 @@ class IColumn;
     M(String, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
     M(String, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
     M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
+    M(String, format_schema_rows_template, "\n", "Format string for rows (for Template format)", 0) \
     \
     M(EscapingRule, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
     M(String, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 608f9433d6f..6f414c5a69f 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -166,6 +166,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.template_settings.resultset_format = settings.format_template_resultset;
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.template_settings.row_format = settings.format_template_row;
+    format_settings.template_settings.row_format_schema_string = settings.format_schema_rows_template;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
     format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
     format_settings.tsv.enum_as_number = settings.input_format_tsv_enum_as_number;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 30e4dd04513..70d33a1edcd 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -338,6 +338,7 @@ struct FormatSettings
         String resultset_format;
         String row_format;
         String row_between_delimiter;
+        String row_format_schema_string;
     } template_settings;
 
     struct

From c966674c242552584540dc2e28026894c39f9b16 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 19 Jan 2024 03:59:05 +0100
Subject: [PATCH 102/884] Disable LTO with Coverage

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 063cfc77302..6e984ddd864 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -348,7 +348,7 @@ if (COMPILER_CLANG)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
 
-    if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX)
+    if (NOT ENABLE_TESTS AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX)
         # https://clang.llvm.org/docs/ThinLTO.html
         # Applies to clang and linux only.
         # Disabled when building with tests or sanitizers.

From c6afbe522cae20ee6041534bf7ee7e31e3acb51c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 19 Jan 2024 05:26:14 +0100
Subject: [PATCH 103/884] Do not check for large translation units with
 coverage

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6e984ddd864..d0f44f6f3ad 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -546,7 +546,7 @@ if (ENABLE_RUST)
     endif()
 endif()
 
-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
     set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
 else ()
     set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)

From 482229cd27c7ddf4218af2ea5d9b087e51876ab0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 19 Jan 2024 08:04:34 +0100
Subject: [PATCH 104/884] Add tests with coverage

---
 .github/workflows/master.yml       | 16 ++++++++++++++++
 .github/workflows/pull_request.yml | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 1920f3a2a56..5f683fa6c59 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -371,6 +371,14 @@ jobs:
       test_name: Stateless tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestCoverage:
+    needs: [RunConfig, BuilderDebReleaseCoverage]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (coverage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestReleaseDatabaseOrdinary:
     needs: [RunConfig, BuilderDebRelease]
     if: ${{ !failure() && !cancelled() }}
@@ -463,6 +471,14 @@ jobs:
       test_name: Stateful tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestCoverage:
+    needs: [RunConfig, BuilderDebReleaseCoverage]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (coverage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatefulTestAarch64:
     needs: [RunConfig, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 28617695ad5..235c8042657 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -382,6 +382,14 @@ jobs:
       test_name: Stateless tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestCoverage:
+    needs: [RunConfig, BuilderDebReleaseCoverage]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (coverage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [RunConfig, BuilderDebRelease]
     if: ${{ !failure() && !cancelled() }}
@@ -509,6 +517,14 @@ jobs:
       test_name: Stateful tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestCoverage:
+    needs: [RunConfig, BuilderDebReleaseCoverage]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (coverage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatefulTestAarch64:
     needs: [RunConfig, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}

From 05609cf75d5048fbd62508fcf6454cec1855943d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 19 Jan 2024 17:02:17 +0100
Subject: [PATCH 105/884] Ci to CI

---
 tests/ci/ci_config.py      | 6 +++---
 tests/ci/test_ci_config.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 1d94f4fc1cc..611767be2e4 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -270,9 +270,9 @@ sql_test_params = {
 
 
 @dataclass
-class CiConfig:
+class CIConfig:
     """
-    Contains configs for ALL jobs in CI pipeline
+    Contains configs for all jobs in the CI pipeline
     each config item in the below dicts should be an instance of JobConfig class or inherited from it
     """
 
@@ -435,7 +435,7 @@ class CiConfig:
             raise KeyError("config contains errors", errors)
 
 
-CI_CONFIG = CiConfig(
+CI_CONFIG = CIConfig(
     label_configs={
         Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]),
     },
diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py
index d22ed16748e..49d49d9c328 100644
--- a/tests/ci/test_ci_config.py
+++ b/tests/ci/test_ci_config.py
@@ -3,7 +3,7 @@
 import unittest
 
 
-class TestCiConfig(unittest.TestCase):
+class TestCIConfig(unittest.TestCase):
     def test_no_errors_in_ci_config(self):
         raised = None
         try:

From 639d7745d450073234405d0725cbd64884d4f8c5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 19 Jan 2024 17:02:23 +0100
Subject: [PATCH 106/884] Fix error

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 7033d4b52e2..d3721108426 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -181,7 +181,7 @@ function setup_logs_replication
         # Create the destination table with adapted name and structure:
         statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
             s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
-            s/ORDER BY \(?(.+?)\)?/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)/;
+            s/^ORDER BY \(?(.+?)\)?$/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)/;
             s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
             /^TTL /d
             ')

From 8c54a09e6652b491764abeddf3a0e8e6800374ef Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 20 Jan 2024 08:59:29 +0100
Subject: [PATCH 107/884] Fix error

---
 docker/test/base/setup_export_logs.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index d3721108426..156adb1d1e4 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -17,7 +17,7 @@ CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
 
 EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, "}
 EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, toLowCardinality('') AS check_name, toLowCardinality('') AS instance_type, '' AS instance_id"}
-EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
+EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name"}
 
 # trace_log needs more columns for symbolization
 EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
@@ -181,7 +181,7 @@ function setup_logs_replication
         # Create the destination table with adapted name and structure:
         statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
             s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
-            s/^ORDER BY \(?(.+?)\)?$/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \1)/;
+            s/^ORDER BY (([^\(].+?)|\((.+?)\))$/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"', \2\3)/;
             s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
             /^TTL /d
             ')

From cfe60586c007a230df68771b3f914d9a66414b7d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 20 Jan 2024 21:45:11 +0100
Subject: [PATCH 108/884] Reset coverage after each test

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index b62bd5975ea..49c517852a6 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2894,7 +2894,7 @@ def parse_args():
     parser.add_argument(
         "--reset-coverage-before-every-test",
         action="store_true",
-        default=False,
+        default=True,
         help="Collect isolated test coverage for every test instead of a cumulative. Useful only when tests are run sequentially.",
     )
     parser.add_argument(

From 51cc01f8be8fea1fcaea0af9c85ca2930536e593 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 21 Jan 2024 14:36:03 +0100
Subject: [PATCH 109/884] Minor change

---
 base/base/coverage.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index 499e384d21f..05bef21049b 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -62,6 +62,7 @@ namespace
 
     uintptr_t * allocate(size_t size)
     {
+        /// Note: mmap return zero-initialized memory, and we count on that.
         void * map = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
         if (MAP_FAILED == map)
             return nullptr;
@@ -91,8 +92,6 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
     /// Note: we will leak this.
     current_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
     cumulative_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
-
-    resetCoverage();
 }
 
 /// This is called at least once for every DSO for initialization

From b967cc6af9deac20eff318e3433fc5b09fd6314a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 21 Jan 2024 15:30:50 +0100
Subject: [PATCH 110/884] Fix error

---
 base/base/coverage.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index 05bef21049b..b85f1a16d32 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -92,6 +92,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
     /// Note: we will leak this.
     current_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
     cumulative_coverage_array = allocate(sizeof(uintptr_t) * coverage_array_size);
+
+    resetCoverage();
 }
 
 /// This is called at least once for every DSO for initialization
@@ -102,8 +104,8 @@ void __sanitizer_cov_pcs_init(const uintptr_t * pcs_begin, const uintptr_t * pcs
         return;
     pc_table_initialized = true;
 
-    all_addresses_array = allocate(sizeof(uintptr_t) * coverage_array_size);
     all_addresses_array_size = pcs_end - pcs_begin;
+    all_addresses_array = allocate(sizeof(uintptr_t) * all_addresses_array_size);
 
     /// They are not a real pointers, but also contain a flag in the most significant bit,
     /// in which we are not interested for now. Reset it.
@@ -125,10 +127,10 @@ void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
     /// The values of `*guard` are as you set them in
     /// __sanitizer_cov_trace_pc_guard_init and so you can make them consecutive
     /// and use them to dereference an array or a bit vector.
-    void * pc = __builtin_return_address(0);
+    intptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
 
-    current_coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
-    cumulative_coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
+    current_coverage_array[guard - guards_start] = pc;
+    cumulative_coverage_array[guard - guards_start] = pc;
 }
 
 }

From 799b8d6356e68c4544791f42a72d71bed38322c5 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Sun, 21 Jan 2024 19:00:40 +0000
Subject: [PATCH 111/884] support resource request canceling

---
 docs/en/operations/system-tables/scheduler.md |  4 +
 src/Common/Scheduler/ISchedulerNode.h         |  2 +
 src/Common/Scheduler/ISchedulerQueue.h        |  6 ++
 src/Common/Scheduler/Nodes/FairPolicy.h       | 98 ++++++++++---------
 src/Common/Scheduler/Nodes/FifoQueue.h        | 23 ++++-
 src/Common/Scheduler/Nodes/PriorityPolicy.h   | 37 ++++---
 .../gtest_resource_manager_hierarchical.cpp   |  1 -
 .../Nodes/tests/gtest_resource_scheduler.cpp  | 63 ++++++++++++
 src/Common/Scheduler/ResourceGuard.h          |  9 +-
 src/Common/Scheduler/ResourceRequest.cpp      | 13 +++
 src/Common/Scheduler/ResourceRequest.h        | 30 +++---
 src/Common/Scheduler/SchedulerRoot.h          | 32 +++---
 .../System/StorageSystemScheduler.cpp         |  4 +
 13 files changed, 218 insertions(+), 104 deletions(-)
 create mode 100644 src/Common/Scheduler/ResourceRequest.cpp

diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md
index 953db4c28f2..c4de7f76fdc 100644
--- a/docs/en/operations/system-tables/scheduler.md
+++ b/docs/en/operations/system-tables/scheduler.md
@@ -26,7 +26,9 @@ priority:          0
 is_active:         0
 active_children:   0
 dequeued_requests: 67
+canceled_requests: 0
 dequeued_cost:     4692272
+canceled_cost:     0
 busy_periods:      63
 vruntime:          938454.1999999989
 system_vruntime:   ᴺᵁᴸᴸ
@@ -54,7 +56,9 @@ Columns:
 - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
 - `active_children` (`UInt64`) - The number of children in active state.
 - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
+- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
 - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
+- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
 - `busy_periods` (`UInt64`) - The total number of deactivations of this node.
 - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
 - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h
index 804026d7bf4..20c1f4332da 100644
--- a/src/Common/Scheduler/ISchedulerNode.h
+++ b/src/Common/Scheduler/ISchedulerNode.h
@@ -387,7 +387,9 @@ public:
 
     /// Introspection
     std::atomic<UInt64> dequeued_requests{0};
+    std::atomic<UInt64> canceled_requests{0};
     std::atomic<ResourceCost> dequeued_cost{0};
+    std::atomic<ResourceCost> canceled_cost{0};
     std::atomic<UInt64> busy_periods{0};
 };
 
diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h
index cbe63bd304a..532f4bf6c63 100644
--- a/src/Common/Scheduler/ISchedulerQueue.h
+++ b/src/Common/Scheduler/ISchedulerQueue.h
@@ -50,6 +50,12 @@ public:
     /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
     virtual void enqueueRequest(ResourceRequest * request) = 0;
 
+    /// Cancel previously enqueued request.
+    /// Returns `false` and does nothing given unknown or already executed request.
+    /// Returns `true` if requests has been found and canceled.
+    /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
+    virtual bool cancelRequest(ResourceRequest * request) = 0;
+
     /// For introspection
     ResourceCost getBudget() const
     {
diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h
index c0e187e6fa9..53740e7a543 100644
--- a/src/Common/Scheduler/Nodes/FairPolicy.h
+++ b/src/Common/Scheduler/Nodes/FairPolicy.h
@@ -134,56 +134,64 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        if (heap_size == 0)
-            return {nullptr, false};
-
-        // Recursively pull request from child
-        auto [request, child_active] = items.front().child->dequeueRequest();
-        assert(request != nullptr);
-        std::pop_heap(items.begin(), items.begin() + heap_size);
-        Item & current = items[heap_size - 1];
-
-        // SFQ fairness invariant: system vruntime equals last served request start-time
-        assert(current.vruntime >= system_vruntime);
-        system_vruntime = current.vruntime;
-
-        // By definition vruntime is amount of consumed resource (cost) divided by weight
-        current.vruntime += double(request->cost) / current.child->info.weight;
-        max_vruntime = std::max(max_vruntime, current.vruntime);
-
-        if (child_active) // Put active child back in heap after vruntime update
+        while (true)
         {
-            std::push_heap(items.begin(), items.begin() + heap_size);
-        }
-        else // Deactivate child if it is empty, but remember it's vruntime for latter activations
-        {
-            heap_size--;
+            if (heap_size == 0)
+                return {nullptr, false};
 
-            // Store index of this inactive child in `parent.idx`
-            // This enables O(1) search of inactive children instead of O(n)
-            current.child->info.parent.idx = heap_size;
-        }
+            // Recursively pull request from child
+            auto [request, child_active] = items.front().child->dequeueRequest();
+            std::pop_heap(items.begin(), items.begin() + heap_size);
+            Item & current = items[heap_size - 1];
 
-        // Reset any difference between children on busy period end
-        if (heap_size == 0)
-        {
-            // Reset vtime to zero to avoid floating-point error accumulation,
-            // but do not reset too often, because it's O(N)
-            UInt64 ns = clock_gettime_ns();
-            if (last_reset_ns + 1000000000 < ns)
+            if (request)
             {
-                last_reset_ns = ns;
-                for (Item & item : items)
-                    item.vruntime = 0;
-                max_vruntime = 0;
-            }
-            system_vruntime = max_vruntime;
-            busy_periods++;
-        }
+                // SFQ fairness invariant: system vruntime equals last served request start-time
+                assert(current.vruntime >= system_vruntime);
+                system_vruntime = current.vruntime;
 
-        dequeued_requests++;
-        dequeued_cost += request->cost;
-        return {request, heap_size > 0};
+                // By definition vruntime is amount of consumed resource (cost) divided by weight
+                current.vruntime += double(request->cost) / current.child->info.weight;
+                max_vruntime = std::max(max_vruntime, current.vruntime);
+            }
+
+            if (child_active) // Put active child back in heap after vruntime update
+            {
+                std::push_heap(items.begin(), items.begin() + heap_size);
+            }
+            else // Deactivate child if it is empty, but remember it's vruntime for latter activations
+            {
+                heap_size--;
+
+                // Store index of this inactive child in `parent.idx`
+                // This enables O(1) search of inactive children instead of O(n)
+                current.child->info.parent.idx = heap_size;
+            }
+
+            // Reset any difference between children on busy period end
+            if (heap_size == 0)
+            {
+                // Reset vtime to zero to avoid floating-point error accumulation,
+                // but do not reset too often, because it's O(N)
+                UInt64 ns = clock_gettime_ns();
+                if (last_reset_ns + 1000000000 < ns)
+                {
+                    last_reset_ns = ns;
+                    for (Item & item : items)
+                        item.vruntime = 0;
+                    max_vruntime = 0;
+                }
+                system_vruntime = max_vruntime;
+                busy_periods++;
+            }
+
+            if (request)
+            {
+                dequeued_requests++;
+                dequeued_cost += request->cost;
+                return {request, heap_size > 0};
+            }
+        }
     }
 
     bool isActive() override
diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h
index 38ae902bc2f..2adb7241314 100644
--- a/src/Common/Scheduler/Nodes/FifoQueue.h
+++ b/src/Common/Scheduler/Nodes/FifoQueue.h
@@ -40,7 +40,6 @@ public:
     void enqueueRequest(ResourceRequest * request) override
     {
         std::unique_lock lock(mutex);
-        request->enqueue_ns = clock_gettime_ns();
         queue_cost += request->cost;
         bool was_empty = requests.empty();
         requests.push_back(request);
@@ -63,6 +62,26 @@ public:
         return {result, !requests.empty()};
     }
 
+    bool cancelRequest(ResourceRequest * request) override
+    {
+        std::unique_lock lock(mutex);
+        // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
+        for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
+        {
+            if (*i == request)
+            {
+                requests.erase(i);
+                if (requests.empty())
+                    busy_periods++;
+                queue_cost -= request->cost;
+                canceled_requests++;
+                canceled_cost += request->cost;
+                return true;
+            }
+        }
+        return false;
+    }
+
     bool isActive() override
     {
         std::unique_lock lock(mutex);
@@ -105,7 +124,7 @@ public:
 private:
     std::mutex mutex;
     Int64 queue_cost = 0;
-    std::deque<ResourceRequest *> requests;
+    std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
 };
 
 }
diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h
index 6d6b15bd063..fd02ea3df62 100644
--- a/src/Common/Scheduler/Nodes/PriorityPolicy.h
+++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h
@@ -102,25 +102,30 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        if (items.empty())
-            return {nullptr, false};
-
-        // Recursively pull request from child
-        auto [request, child_active] = items.front().child->dequeueRequest();
-        assert(request != nullptr);
-
-        // Deactivate child if it is empty
-        if (!child_active)
+        while (true)
         {
-            std::pop_heap(items.begin(), items.end());
-            items.pop_back();
             if (items.empty())
-                busy_periods++;
-        }
+                return {nullptr, false};
 
-        dequeued_requests++;
-        dequeued_cost += request->cost;
-        return {request, !items.empty()};
+            // Recursively pull request from child
+            auto [request, child_active] = items.front().child->dequeueRequest();
+
+            // Deactivate child if it is empty
+            if (!child_active)
+            {
+                std::pop_heap(items.begin(), items.end());
+                items.pop_back();
+                if (items.empty())
+                    busy_periods++;
+            }
+
+            if (request)
+            {
+                dequeued_requests++;
+                dequeued_cost += request->cost;
+                return {request, !items.empty()};
+            }
+        }
     }
 
     bool isActive() override
diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp
index 961a3b6f713..cdf09776077 100644
--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp
@@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke)
     {
         ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
         gA.lock();
-        gA.setFailure();
         gA.unlock();
 
         ResourceGuard gB(cB->get("res1"));
diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
index 9fefbc02cbd..e76639a4b01 100644
--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
@@ -4,6 +4,7 @@
 
 #include <Common/Scheduler/Nodes/tests/ResourceTest.h>
 
+#include <barrier>
 #include <future>
 
 using namespace DB;
@@ -73,6 +74,22 @@ struct ResourceHolder
     }
 };
 
+struct MyRequest : public ResourceRequest
+{
+    std::function<void()> on_execute;
+
+    explicit MyRequest(ResourceCost cost_, std::function<void()> on_execute_)
+        : ResourceRequest(cost_)
+        , on_execute(on_execute_)
+    {}
+
+    void execute() override
+    {
+        if (on_execute)
+            on_execute();
+    }
+};
+
 TEST(SchedulerRoot, Smoke)
 {
     ResourceTest t;
@@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke)
         EXPECT_TRUE(fc2->requests.contains(&rg.request));
     }
 }
+
+TEST(SchedulerRoot, Cancel)
+{
+    ResourceTest t;
+
+    ResourceHolder r1(t);
+    auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
+    r1.add<PriorityPolicy>("/prio");
+    auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
+    auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
+    r1.registerResource();
+
+    std::barrier sync(2);
+    std::thread consumer1([&]
+    {
+        std::barrier destruct_sync(2);
+        MyRequest request(1,[&]
+        {
+            sync.arrive_and_wait(); // (A)
+            EXPECT_TRUE(fc1->requests.contains(&request));
+            sync.arrive_and_wait(); // (B)
+            request.finish();
+            destruct_sync.arrive_and_wait(); // (C)
+        });
+        a.queue->enqueueRequest(&request);
+        destruct_sync.arrive_and_wait(); // (C)
+    });
+
+    std::thread consumer2([&]
+    {
+        MyRequest request(1,[&]
+        {
+            FAIL() << "This request must be canceled, but instead executes";
+        });
+        sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately
+        b.queue->enqueueRequest(&request);
+        bool canceled = b.queue->cancelRequest(&request);
+        EXPECT_TRUE(canceled);
+        sync.arrive_and_wait(); // (B) release request of consumer1 to be finished
+    });
+
+    consumer1.join();
+    consumer2.join();
+
+    EXPECT_TRUE(fc1->requests.empty());
+}
diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h
index dca4041b176..50f665a384b 100644
--- a/src/Common/Scheduler/ResourceGuard.h
+++ b/src/Common/Scheduler/ResourceGuard.h
@@ -71,8 +71,7 @@ public:
             // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
             chassert(state == Dequeued);
             state = Finished;
-            if (constraint)
-                constraint->finishRequest(this);
+            ResourceRequest::finish();
         }
 
         static Request & local()
@@ -126,12 +125,6 @@ public:
         }
     }
 
-    /// Mark request as unsuccessful; by default request is considered to be successful
-    void setFailure()
-    {
-        request.successful = false;
-    }
-
     ResourceLink link;
     Request & request;
 };
diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp
new file mode 100644
index 00000000000..26e8084cdfa
--- /dev/null
+++ b/src/Common/Scheduler/ResourceRequest.cpp
@@ -0,0 +1,13 @@
+#include <Common/Scheduler/ResourceRequest.h>
+#include <Common/Scheduler/ISchedulerConstraint.h>
+
+namespace DB
+{
+
+void ResourceRequest::finish()
+{
+    if (constraint)
+        constraint->finishRequest(this);
+}
+
+}
diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h
index 3d2230746f9..f3153ad382c 100644
--- a/src/Common/Scheduler/ResourceRequest.h
+++ b/src/Common/Scheduler/ResourceRequest.h
@@ -14,9 +14,6 @@ class ISchedulerConstraint;
 using ResourceCost = Int64;
 constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
 
-/// Timestamps (nanoseconds since epoch)
-using ResourceNs = UInt64;
-
 /*
  * Request for a resource consumption. The main moving part of the scheduling subsystem.
  * Resource requests processing workflow:
@@ -31,7 +28,7 @@ using ResourceNs = UInt64;
  *  3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request.
  *  4) Callback ResourceRequest::execute() is called to provide access to the resource.
  *  5) The resource consumption is happening outside of the scheduling subsystem.
- *  6) request->constraint->finishRequest() is called when consumption is finished.
+ *  6) ResourceRequest::finish() is called when consumption is finished.
  *
  * Steps (5) and (6) can be omitted if constraint is not used by the resource.
  *
@@ -39,7 +36,10 @@ using ResourceNs = UInt64;
  * Request ownership is done outside of the scheduling subsystem.
  * After (6) request can be destructed safely.
  *
- * Request cancelling is not supported yet.
+ * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest().
+ * Returning false means it is too late for request to be canceled. It should be processed in a regular way.
+ * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
+ * and step (6) MUST be omitted.
  */
 class ResourceRequest
 {
@@ -48,32 +48,20 @@ public:
     /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
     ResourceCost cost;
 
-    /// Request outcome
-    /// Should be filled during resource consumption
-    bool successful;
-
     /// Scheduler node to be notified on consumption finish
     /// Auto-filled during request enqueue/dequeue
     ISchedulerConstraint * constraint;
 
-    /// Timestamps for introspection
-    ResourceNs enqueue_ns;
-    ResourceNs execute_ns;
-    ResourceNs finish_ns;
-
     explicit ResourceRequest(ResourceCost cost_ = 1)
     {
         reset(cost_);
     }
 
+    /// ResourceRequest object may be reused again after reset()
     void reset(ResourceCost cost_)
     {
         cost = cost_;
-        successful = true;
         constraint = nullptr;
-        enqueue_ns = 0;
-        execute_ns = 0;
-        finish_ns = 0;
     }
 
     virtual ~ResourceRequest() = default;
@@ -83,6 +71,12 @@ public:
     /// just triggering start of a consumption, not doing the consumption itself
     /// (e.g. setting an std::promise or creating a job in a thread pool)
     virtual void execute() = 0;
+
+    /// Stop resource consumption and notify resource scheduler.
+    /// Should be called when resource consumption is finished by consumer.
+    /// ResourceRequest should not be destructed or reset before calling to `finish()`.
+    /// WARNING: this function MUST not be called if request was canceled.
+    void finish();
 };
 
 }
diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h
index 3a23a8df834..ab3f702a422 100644
--- a/src/Common/Scheduler/SchedulerRoot.h
+++ b/src/Common/Scheduler/SchedulerRoot.h
@@ -145,22 +145,27 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        if (current == nullptr) // No active resources
-            return {nullptr, false};
+        while (true)
+        {
+            if (current == nullptr) // No active resources
+                return {nullptr, false};
 
-        // Dequeue request from current resource
-        auto [request, resource_active] = current->root->dequeueRequest();
-        assert(request != nullptr);
+            // Dequeue request from current resource
+            auto [request, resource_active] = current->root->dequeueRequest();
 
-        // Deactivate resource if required
-        if (!resource_active)
-            deactivate(current);
-        else
-            current = current->next; // Just move round-robin pointer
+            // Deactivate resource if required
+            if (!resource_active)
+                deactivate(current);
+            else
+                current = current->next; // Just move round-robin pointer
 
-        dequeued_requests++;
-        dequeued_cost += request->cost;
-        return {request, current != nullptr};
+            if (request == nullptr) // Possible in case of request cancel, just retry
+                continue;
+
+            dequeued_requests++;
+            dequeued_cost += request->cost;
+            return {request, current != nullptr};
+        }
     }
 
     bool isActive() override
@@ -245,7 +250,6 @@ private:
 
     void execute(ResourceRequest * request)
     {
-        request->execute_ns = clock_gettime_ns();
         request->execute();
     }
 
diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp
index ba07d44dbf9..633bac5d285 100644
--- a/src/Storages/System/StorageSystemScheduler.cpp
+++ b/src/Storages/System/StorageSystemScheduler.cpp
@@ -30,7 +30,9 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription()
         {"is_active", std::make_shared<DataTypeUInt8>(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."},
         {"active_children", std::make_shared<DataTypeUInt64>(), "The number of children in active state."},
         {"dequeued_requests", std::make_shared<DataTypeUInt64>(), "The total number of resource requests dequeued from this node."},
+        {"canceled_requests", std::make_shared<DataTypeUInt64>(), "The total number of resource requests canceled from this node."},
         {"dequeued_cost", std::make_shared<DataTypeInt64>(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."},
+        {"canceled_cost", std::make_shared<DataTypeInt64>(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."},
         {"busy_periods", std::make_shared<DataTypeUInt64>(), "The total number of deactivations of this node."},
         {"vruntime", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()),
             "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."},
@@ -93,7 +95,9 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         res_columns[i++]->insert(node->isActive());
         res_columns[i++]->insert(node->activeChildren());
         res_columns[i++]->insert(node->dequeued_requests.load());
+        res_columns[i++]->insert(node->canceled_requests.load());
         res_columns[i++]->insert(node->dequeued_cost.load());
+        res_columns[i++]->insert(node->canceled_cost.load());
         res_columns[i++]->insert(node->busy_periods.load());
 
         Field vruntime;

From eae39ff545978386a8a57bca7c68b1ff97cf6d6d Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 21 Jan 2024 21:51:06 +0200
Subject: [PATCH 112/884] #31363 - modified TemplateBlockOutputFormat to work
 with added format_schema_rows_template setting

---
 src/Core/Settings.h                           |  2 +-
 src/Formats/FormatFactory.cpp                 |  3 +-
 src/Formats/FormatSettings.h                  |  2 +-
 .../Impl/TemplateBlockOutputFormat.cpp        | 33 +++++++++++++++----
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4de739ec405..3143ada7d65 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1078,8 +1078,8 @@ class IColumn;
     M(String, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
     M(String, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
     M(String, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
-    M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
     M(String, format_schema_rows_template, "\n", "Format string for rows (for Template format)", 0) \
+    M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
     \
     M(EscapingRule, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
     M(String, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 6f414c5a69f..6f7f758621c 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -166,7 +166,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.template_settings.resultset_format = settings.format_template_resultset;
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.template_settings.row_format = settings.format_template_row;
-    format_settings.template_settings.row_format_schema_string = settings.format_schema_rows_template;
+    format_settings.template_settings.row_format_schema = settings.format_schema_rows_template;
+    format_settings.template_settings.row_between_delimiter_schema = settings.format_schema_rows_between_delimiter;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
     format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
     format_settings.tsv.enum_as_number = settings.input_format_tsv_enum_as_number;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 70d33a1edcd..28a2076af84 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -338,7 +338,7 @@ struct FormatSettings
         String resultset_format;
         String row_format;
         String row_between_delimiter;
-        String row_format_schema_string;
+        String row_format_schema;
     } template_settings;
 
     struct
diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
index 6d8fe1e5a2c..495cc0e541e 100644
--- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
@@ -11,6 +11,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int SYNTAX_ERROR;
+    extern const int INVALID_TEMPLATE_FORMAT;
 }
 
 TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_,
@@ -213,14 +214,34 @@ void registerOutputFormatTemplate(FormatFactory & factory)
                     });
         }
 
-        ParsedTemplateFormatString row_format = ParsedTemplateFormatString(
+        ParsedTemplateFormatString row_format;
+        auto idx_by_name = [&](const String & colName)
+        {
+            return sample.getPositionByName(colName);
+        };
+        if (settings.template_settings.row_format.empty())
+        {
+            if (settings.template_settings.row_format_schema.empty())
+            {
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template");
+            }
+            else
+            {
+                row_format = ParsedTemplateFormatString();
+                row_format.parse(settings.template_settings.row_format_schema,idx_by_name);
+            }
+        }
+        else
+        {
+            if (settings.template_settings.row_format_schema.empty())
+            {
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template");
+            }
+            row_format = ParsedTemplateFormatString(
                 FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
                         settings.schema.is_server, settings.schema.format_schema_path),
-                [&](const String & colName)
-                {
-                    return sample.getPositionByName(colName);
-                });
-
+                idx_by_name);
+        }
         return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter);
     });
 

From d2c671c17eb4a85583b30d81033f7180ea93f627 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 19 Jan 2024 20:38:08 +0000
Subject: [PATCH 113/884] 1st attempt at vectorization

80 mio arrays with 150 elements each, runtimes in sec

WITH
    (SELECT vec FROM vectors limit 1) AS const_vec
SELECT sum(dist)
FROM
    (SELECT <distance>(const_vec, vec) AS dist FROM vectors)

                        auto-vectorized         hand-vectorized
L2      Float32         0.61                    0.57
L2      Float64         1.15                    0.99
cos     Float32         0.78                    0.65
cos     Float64         1.35                    1.05
---
 src/Functions/array/arrayDistance.cpp         | 145 +++++++++++++++++-
 .../02282_array_distance.reference            |   4 +
 .../0_stateless/02282_array_distance.sql      |  40 +++--
 3 files changed, 172 insertions(+), 17 deletions(-)

diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp
index c68c89ee0d5..670442c0c79 100644
--- a/src/Functions/array/arrayDistance.cpp
+++ b/src/Functions/array/arrayDistance.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/IColumn.h>
+#include <Common/TargetSpecific.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/IDataType.h>
@@ -9,6 +10,10 @@
 #include <Functions/FunctionHelpers.h>
 #include <base/range.h>
 
+#if USE_MULTITARGET_CODE
+#include <immintrin.h>
+#endif
+
 namespace DB
 {
 namespace ErrorCodes
@@ -75,6 +80,49 @@ struct L2Distance
         state.sum += other_state.sum;
     }
 
+#if USE_MULTITARGET_CODE
+    template <typename ResultType>
+    AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
+        const ResultType * __restrict data_x,
+        const ResultType * __restrict data_y,
+        size_t i_max,
+        size_t & i_x,
+        size_t & i_y,
+        State<ResultType> & state)
+    {
+        __m512 sums;
+        if constexpr (std::is_same_v<ResultType, Float32>)
+            sums = _mm512_setzero_ps();
+        else
+            sums = _mm512_setzero_pd();
+
+        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+
+        for (; i_x + n < i_max; i_x += n, i_y += n)
+        {
+            if constexpr (std::is_same_v<ResultType, Float32>)
+            {
+                __m512 x = _mm512_loadu_ps(data_x + i_x);
+                __m512 y = _mm512_loadu_ps(data_y + i_y);
+                __m512 differences = _mm512_sub_ps(x, y);
+                sums = _mm512_fmadd_ps(differences, differences, sums);
+            }
+            else
+            {
+                __m512 x = _mm512_loadu_pd(data_x + i_x);
+                __m512 y = _mm512_loadu_pd(data_y + i_y);
+                __m512 differences = _mm512_sub_pd(x, y);
+                sums = _mm512_fmadd_pd(differences, differences, sums);
+            }
+        }
+
+        if constexpr (std::is_same_v<ResultType, Float32>)
+            state.sum = _mm512_reduce_add_ps(sums);
+        else
+            state.sum = _mm512_reduce_add_pd(sums);
+    }
+#endif
+
     template <typename ResultType>
     static ResultType finalize(const State<ResultType> & state, const ConstParams &)
     {
@@ -189,6 +237,70 @@ struct CosineDistance
         state.y_squared += other_state.y_squared;
     }
 
+#if USE_MULTITARGET_CODE
+    template <typename ResultType>
+    AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
+        const ResultType * __restrict data_x,
+        const ResultType * __restrict data_y,
+        size_t i_max,
+        size_t & i_x,
+        size_t & i_y,
+        State<ResultType> & state)
+    {
+        __m512 dot_products;
+        __m512 x_squareds;
+        __m512 y_squareds;
+
+        if constexpr (std::is_same_v<ResultType, Float32>)
+        {
+            dot_products = _mm512_setzero_ps();
+            x_squareds = _mm512_setzero_ps();
+            y_squareds = _mm512_setzero_ps();
+        }
+        else
+        {
+            dot_products = _mm512_setzero_pd();
+            x_squareds = _mm512_setzero_pd();
+            y_squareds = _mm512_setzero_pd();
+        }
+
+        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+
+        for (; i_x + n < i_max; i_x += n, i_y += n)
+        {
+            if constexpr (std::is_same_v<ResultType, Float32>)
+            {
+                __m512 x = _mm512_loadu_ps(data_x + i_x);
+                __m512 y = _mm512_loadu_ps(data_y + i_y);
+                dot_products = _mm512_fmadd_ps(x, y, dot_products);
+                x_squareds = _mm512_fmadd_ps(x, x, x_squareds);
+                y_squareds = _mm512_fmadd_ps(y, y, y_squareds);
+            }
+            else
+            {
+                __m512 x = _mm512_loadu_pd(data_x + i_x);
+                __m512 y = _mm512_loadu_pd(data_y + i_y);
+                dot_products = _mm512_fmadd_pd(x, y, dot_products);
+                x_squareds = _mm512_fmadd_pd(x, x, x_squareds);
+                y_squareds = _mm512_fmadd_pd(y, y, y_squareds);
+            }
+        }
+
+        if constexpr (std::is_same_v<ResultType, Float32>)
+        {
+            state.dot_prod = _mm512_reduce_add_ps(dot_products);
+            state.x_squared = _mm512_reduce_add_ps(x_squareds);
+            state.y_squared = _mm512_reduce_add_ps(y_squareds);
+        }
+        else
+        {
+            state.dot_prod = _mm512_reduce_add_pd(dot_products);
+            state.x_squared = _mm512_reduce_add_pd(x_squareds);
+            state.y_squared = _mm512_reduce_add_pd(y_squareds);
+        }
+    }
+#endif
+
     template <typename ResultType>
     static ResultType finalize(const State<ResultType> & state, const ConstParams &)
     {
@@ -352,7 +464,7 @@ private:
         /// Check that arrays in both columns are the sames size
         for (size_t row = 0; row < offsets_x.size(); ++row)
         {
-            if (unlikely(offsets_x[row] != offsets_y[row]))
+            if (offsets_x[row] != offsets_y[row]) [[unlikely]]
             {
                 ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
                 throw Exception(
@@ -420,7 +532,7 @@ private:
         ColumnArray::Offset prev_offset = 0;
         for (size_t row : collections::range(0, offsets_y.size()))
         {
-            if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
+            if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]]
             {
                 throw Exception(
                     ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
@@ -438,14 +550,35 @@ private:
         auto & result_data = result->getData();
 
         /// Do the actual computation
-        ColumnArray::Offset prev = 0;
+        size_t prev = 0;
         size_t row = 0;
+
         for (auto off : offsets_y)
         {
+            size_t i = 0;
+            typename Kernel::template State<ResultType> state;
+
+            /// SIMD optimization: process multiple elements in both input arrays at once.
+            /// To avoid combinatorial explosion of SIMD kernels, focus on
+            /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
+            ///   10 input types x 2 output types,
+            /// - const/non-const inputs instead of non-const/non-const inputs
+            /// - the two most common metrics L2 and cosine distance,
+            /// - the most powerful SIMD instruction set (AVX-512F).
+#if USE_MULTITARGET_CODE
+            if constexpr (std::is_same_v<ResultType, FirstArgType> && std::is_same_v<ResultType, SecondArgType>) /// ResultType is Float32 or Float64
+            {
+                if constexpr (std::is_same_v<Kernel, L2Distance>
+                           || std::is_same_v<Kernel, CosineDistance>)
+                {
+                    if (isArchSupported(TargetArch::AVX512F))
+                        Kernel::template accumulateCombine<ResultType>(data_x.data(), data_y.data(), i + offsets_x[0], i, prev, state);
+                }
+            }
+#else
             /// Process chunks in vectorized manner
             static constexpr size_t VEC_SIZE = 4;
             typename Kernel::template State<ResultType> states[VEC_SIZE];
-            size_t i = 0;
             for (; prev + VEC_SIZE < off; i += VEC_SIZE, prev += VEC_SIZE)
             {
                 for (size_t s = 0; s < VEC_SIZE; ++s)
@@ -453,10 +586,9 @@ private:
                         states[s], static_cast<ResultType>(data_x[i + s]), static_cast<ResultType>(data_y[prev + s]), kernel_params);
             }
 
-            typename Kernel::template State<ResultType> state;
             for (const auto & other_state : states)
                 Kernel::template combine<ResultType>(state, other_state, kernel_params);
-
+#endif
             /// Process the tail
             for (; prev < off; ++i, ++prev)
             {
@@ -466,6 +598,7 @@ private:
             result_data[row] = Kernel::finalize(state, kernel_params);
             row++;
         }
+
         return result;
     }
 
diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference
index 9758da9a833..c21e294cb62 100644
--- a/tests/queries/0_stateless/02282_array_distance.reference
+++ b/tests/queries/0_stateless/02282_array_distance.reference
@@ -80,3 +80,7 @@ nan
 5	6	268	2	10.234459893824097	23.15167380558045	536	0.00007815428961455151
 6	5	268	2	10.234459893824097	23.15167380558045	536	0.00007815428961455151
 6	6	0	0	0	0	0	0
+5.8309517
+0.0003244877
+5.830951894845301
+0.0003245172890904424
diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql
index 9c16071dc1f..2cca853fd67 100644
--- a/tests/queries/0_stateless/02282_array_distance.sql
+++ b/tests/queries/0_stateless/02282_array_distance.sql
@@ -12,10 +12,10 @@ SELECT cosineDistance([1, 2, 3], [0, 0, 0]);
 -- Overflows
 WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -573412550, -552499284, 2096941042], 'Array(Int32)') AS a
 SELECT
-    L1Distance(a,a),
-    L2Distance(a,a),
-    L2SquaredDistance(a,a),
-    LinfDistance(a,a),
+    L1Distance(a, a),
+    L2Distance(a, a),
+    L2SquaredDistance(a, a),
+    LinfDistance(a, a),
     cosineDistance(a, a);
 
 DROP TABLE IF EXISTS vec1;
@@ -88,15 +88,33 @@ SELECT
 FROM vec2f v1, vec2d v2
 WHERE length(v1.v) == length(v2.v);
 
-SELECT L1Distance([0, 0], [1]); -- { serverError 190 }
-SELECT L2Distance([1, 2], (3,4)); -- { serverError 43 }
-SELECT L2SquaredDistance([1, 2], (3,4)); -- { serverError 43 }
-SELECT LpDistance([1, 2], [3,4]); -- { serverError 42 }
-SELECT LpDistance([1, 2], [3,4], -1.); -- { serverError 69 }
-SELECT LpDistance([1, 2], [3,4], 'aaa'); -- { serverError 43 }
-SELECT LpDistance([1, 2], [3,4], materialize(2.7)); -- { serverError 44 }
+SELECT L1Distance([0, 0], [1]); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
+SELECT L2Distance([1, 2], (3,4)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT L2SquaredDistance([1, 2], (3,4)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT LpDistance([1, 2], [3,4]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT LpDistance([1, 2], [3,4], -1.); -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT LpDistance([1, 2], [3,4], 'aaa'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT LpDistance([1, 2], [3,4], materialize(2.7)); -- { serverError ILLEGAL_COLUMN }
 
 DROP TABLE vec1;
 DROP TABLE vec2;
 DROP TABLE vec2f;
 DROP TABLE vec2d;
+
+-- Queries which trigger manually vectorized implementation
+
+SELECT L2Distance(
+    [toFloat32(0.0), toFloat32(1.0), toFloat32(2.0), toFloat32(3.0), toFloat32(4.0), toFloat32(5.0), toFloat32(6.0), toFloat32(7.0), toFloat32(8.0), toFloat32(9.0), toFloat32(10.0), toFloat32(11.0), toFloat32(12.0), toFloat32(13.0), toFloat32(14.0), toFloat32(15.0), toFloat32(16.0), toFloat32(17.0), toFloat32(18.0), toFloat32(19.0), toFloat32(20.0), toFloat32(21.0), toFloat32(22.0), toFloat32(23.0), toFloat32(24.0), toFloat32(25.0), toFloat32(26.0), toFloat32(27.0), toFloat32(28.0), toFloat32(29.0), toFloat32(30.0), toFloat32(31.0), toFloat32(32.0), toFloat32(33.0)],
+    materialize([toFloat32(1.0), toFloat32(2.0), toFloat32(3.0), toFloat32(4.0), toFloat32(5.0), toFloat32(6.0), toFloat32(7.0), toFloat32(8.0), toFloat32(9.0), toFloat32(10.0), toFloat32(11.0), toFloat32(12.0), toFloat32(13.0), toFloat32(14.0), toFloat32(15.0), toFloat32(16.0), toFloat32(17.0), toFloat32(18.0), toFloat32(19.0), toFloat32(20.0), toFloat32(21.0), toFloat32(22.0), toFloat32(23.0), toFloat32(24.0), toFloat32(25.0), toFloat32(26.0), toFloat32(27.0), toFloat32(28.0), toFloat32(29.0), toFloat32(30.0), toFloat32(31.0), toFloat32(32.0), toFloat32(33.0), toFloat32(34.0)]));
+
+SELECT cosineDistance(
+    [toFloat32(0.0), toFloat32(1.0), toFloat32(2.0), toFloat32(3.0), toFloat32(4.0), toFloat32(5.0), toFloat32(6.0), toFloat32(7.0), toFloat32(8.0), toFloat32(9.0), toFloat32(10.0), toFloat32(11.0), toFloat32(12.0), toFloat32(13.0), toFloat32(14.0), toFloat32(15.0), toFloat32(16.0), toFloat32(17.0), toFloat32(18.0), toFloat32(19.0), toFloat32(20.0), toFloat32(21.0), toFloat32(22.0), toFloat32(23.0), toFloat32(24.0), toFloat32(25.0), toFloat32(26.0), toFloat32(27.0), toFloat32(28.0), toFloat32(29.0), toFloat32(30.0), toFloat32(31.0), toFloat32(32.0), toFloat32(33.0)],
+    materialize([toFloat32(1.0), toFloat32(2.0), toFloat32(3.0), toFloat32(4.0), toFloat32(5.0), toFloat32(6.0), toFloat32(7.0), toFloat32(8.0), toFloat32(9.0), toFloat32(10.0), toFloat32(11.0), toFloat32(12.0), toFloat32(13.0), toFloat32(14.0), toFloat32(15.0), toFloat32(16.0), toFloat32(17.0), toFloat32(18.0), toFloat32(19.0), toFloat32(20.0), toFloat32(21.0), toFloat32(22.0), toFloat32(23.0), toFloat32(24.0), toFloat32(25.0), toFloat32(26.0), toFloat32(27.0), toFloat32(28.0), toFloat32(29.0), toFloat32(30.0), toFloat32(31.0), toFloat32(32.0), toFloat32(33.0), toFloat32(34.0)]));
+
+SELECT L2Distance(
+    [toFloat64(0.0), toFloat64(1.0), toFloat64(2.0), toFloat64(3.0), toFloat64(4.0), toFloat64(5.0), toFloat64(6.0), toFloat64(7.0), toFloat64(8.0), toFloat64(9.0), toFloat64(10.0), toFloat64(11.0), toFloat64(12.0), toFloat64(13.0), toFloat64(14.0), toFloat64(15.0), toFloat64(16.0), toFloat64(17.0), toFloat64(18.0), toFloat64(19.0), toFloat64(20.0), toFloat64(21.0), toFloat64(22.0), toFloat64(23.0), toFloat64(24.0), toFloat64(25.0), toFloat64(26.0), toFloat64(27.0), toFloat64(28.0), toFloat64(29.0), toFloat64(30.0), toFloat64(31.0), toFloat64(32.0), toFloat64(33.0)],
+    materialize([toFloat64(1.0), toFloat64(2.0), toFloat64(3.0), toFloat64(4.0), toFloat64(5.0), toFloat64(6.0), toFloat64(7.0), toFloat64(8.0), toFloat64(9.0), toFloat64(10.0), toFloat64(11.0), toFloat64(12.0), toFloat64(13.0), toFloat64(14.0), toFloat64(15.0), toFloat64(16.0), toFloat64(17.0), toFloat64(18.0), toFloat64(19.0), toFloat64(20.0), toFloat64(21.0), toFloat64(22.0), toFloat64(23.0), toFloat64(24.0), toFloat64(25.0), toFloat64(26.0), toFloat64(27.0), toFloat64(28.0), toFloat64(29.0), toFloat64(30.0), toFloat64(31.0), toFloat64(32.0), toFloat64(33.0), toFloat64(34.0)]));
+
+SELECT cosineDistance(
+    [toFloat64(0.0), toFloat64(1.0), toFloat64(2.0), toFloat64(3.0), toFloat64(4.0), toFloat64(5.0), toFloat64(6.0), toFloat64(7.0), toFloat64(8.0), toFloat64(9.0), toFloat64(10.0), toFloat64(11.0), toFloat64(12.0), toFloat64(13.0), toFloat64(14.0), toFloat64(15.0), toFloat64(16.0), toFloat64(17.0), toFloat64(18.0), toFloat64(19.0), toFloat64(20.0), toFloat64(21.0), toFloat64(22.0), toFloat64(23.0), toFloat64(24.0), toFloat64(25.0), toFloat64(26.0), toFloat64(27.0), toFloat64(28.0), toFloat64(29.0), toFloat64(30.0), toFloat64(31.0), toFloat64(32.0), toFloat64(33.0)],
+    materialize([toFloat64(1.0), toFloat64(2.0), toFloat64(3.0), toFloat64(4.0), toFloat64(5.0), toFloat64(6.0), toFloat64(7.0), toFloat64(8.0), toFloat64(9.0), toFloat64(10.0), toFloat64(11.0), toFloat64(12.0), toFloat64(13.0), toFloat64(14.0), toFloat64(15.0), toFloat64(16.0), toFloat64(17.0), toFloat64(18.0), toFloat64(19.0), toFloat64(20.0), toFloat64(21.0), toFloat64(22.0), toFloat64(23.0), toFloat64(24.0), toFloat64(25.0), toFloat64(26.0), toFloat64(27.0), toFloat64(28.0), toFloat64(29.0), toFloat64(30.0), toFloat64(31.0), toFloat64(32.0), toFloat64(33.0), toFloat64(34.0)]));

From 68d0f4e42161713f3b54de2069d894b1f84ed833 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 20 Jan 2024 21:36:25 +0000
Subject: [PATCH 114/884] (Futile) unrolling attempt at vectorization

---
 src/Functions/array/arrayDistance.cpp | 88 ++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 16 deletions(-)

diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp
index 670442c0c79..aa13ee01d9a 100644
--- a/src/Functions/array/arrayDistance.cpp
+++ b/src/Functions/array/arrayDistance.cpp
@@ -90,36 +90,92 @@ struct L2Distance
         size_t & i_y,
         State<ResultType> & state)
     {
-        __m512 sums;
-        if constexpr (std::is_same_v<ResultType, Float32>)
-            sums = _mm512_setzero_ps();
-        else
-            sums = _mm512_setzero_pd();
+        __m512 sums1;
+        __m512 sums2;
+        __m512 sums3;
+        __m512 sums4;
 
-        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+        if constexpr (std::is_same_v<ResultType, Float32>)
+        {
+            sums1 = _mm512_setzero_ps();
+            sums2 = _mm512_setzero_ps();
+            sums3 = _mm512_setzero_ps();
+            sums4 = _mm512_setzero_ps();
+        }
+        else
+        {
+            sums1 = _mm512_setzero_pd();
+            sums2 = _mm512_setzero_pd();
+            sums3 = _mm512_setzero_pd();
+            sums4 = _mm512_setzero_pd();
+        }
+
+        const size_t n = (std::is_same_v<ResultType, Float32>) ? 64 : 32;
 
         for (; i_x + n < i_max; i_x += n, i_y += n)
         {
             if constexpr (std::is_same_v<ResultType, Float32>)
             {
-                __m512 x = _mm512_loadu_ps(data_x + i_x);
-                __m512 y = _mm512_loadu_ps(data_y + i_y);
-                __m512 differences = _mm512_sub_ps(x, y);
-                sums = _mm512_fmadd_ps(differences, differences, sums);
+                __m512 x1 = _mm512_loadu_ps(data_x + i_x);
+                __m512 y1 = _mm512_loadu_ps(data_y + i_y);
+                __m512 diff1 = _mm512_sub_ps(x1, y1);
+                sums1 = _mm512_fmadd_ps(diff1, diff1, sums1);
+
+                __m512 x2 = _mm512_loadu_ps(data_x + i_x + 16);
+                __m512 y2 = _mm512_loadu_ps(data_y + i_y + 16);
+                __m512 diff2 = _mm512_sub_ps(x2, y2);
+                sums2 = _mm512_fmadd_ps(diff2, diff2, sums2);
+
+                __m512 x3 = _mm512_loadu_ps(data_x + i_x + 32);
+                __m512 y3 = _mm512_loadu_ps(data_y + i_y + 32);
+                __m512 diff3 = _mm512_sub_ps(x3, y3);
+                sums3 = _mm512_fmadd_ps(diff3, diff3, sums3);
+
+                __m512 x4 = _mm512_loadu_ps(data_x + i_x + 48);
+                __m512 y4 = _mm512_loadu_ps(data_y + i_y + 48);
+                __m512 diff4 = _mm512_sub_ps(x4, y4);
+                sums4 = _mm512_fmadd_ps(diff4, diff4, sums4);
             }
             else
             {
-                __m512 x = _mm512_loadu_pd(data_x + i_x);
-                __m512 y = _mm512_loadu_pd(data_y + i_y);
-                __m512 differences = _mm512_sub_pd(x, y);
-                sums = _mm512_fmadd_pd(differences, differences, sums);
+                __m512 x1 = _mm512_loadu_pd(data_x + i_x);
+                __m512 y1 = _mm512_loadu_pd(data_y + i_y);
+                __m512 diff1 = _mm512_sub_pd(x1, y1);
+                sums1 = _mm512_fmadd_pd(diff1, diff1, sums1);
+
+                __m512 x2 = _mm512_loadu_pd(data_x + i_x + 8);
+                __m512 y2 = _mm512_loadu_pd(data_y + i_y + 8);
+                __m512 diff2 = _mm512_sub_pd(x2, y2);
+                sums2 = _mm512_fmadd_pd(diff2, diff2, sums2);
+
+                __m512 x3 = _mm512_loadu_pd(data_x + i_x + 16);
+                __m512 y3 = _mm512_loadu_pd(data_y + i_y + 16);
+                __m512 diff3 = _mm512_sub_pd(x3, y3);
+                sums3 = _mm512_fmadd_pd(diff3, diff3, sums3);
+
+                __m512 x4 = _mm512_loadu_pd(data_x + i_x + 24);
+                __m512 y4 = _mm512_loadu_pd(data_y + i_y + 24);
+                __m512 diff4 = _mm512_sub_pd(x4, y4);
+                sums4 = _mm512_fmadd_pd(diff4, diff4, sums4);
             }
         }
 
         if constexpr (std::is_same_v<ResultType, Float32>)
-            state.sum = _mm512_reduce_add_ps(sums);
+        {
+            Float32 sum1 = _mm512_reduce_add_ps(sums1);
+            Float32 sum2 = _mm512_reduce_add_ps(sums2);
+            Float32 sum3 = _mm512_reduce_add_ps(sums3);
+            Float32 sum4 = _mm512_reduce_add_ps(sums4);
+            state.sum = sum1 + sum2 + sum3 + sum4;
+        }
         else
-            state.sum = _mm512_reduce_add_pd(sums);
+        {
+            Float64 sum1 = _mm512_reduce_add_pd(sums1);
+            Float64 sum2 = _mm512_reduce_add_pd(sums2);
+            Float64 sum3 = _mm512_reduce_add_pd(sums3);
+            Float64 sum4 = _mm512_reduce_add_pd(sums4);
+            state.sum = sum1 + sum2 + sum3 + sum4;
+        }
     }
 #endif
 

From 68fc97089ec22d29b5d25df4e3865a22cf9701db Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 20 Jan 2024 21:50:13 +0000
Subject: [PATCH 115/884] Revert "(Futile) unrolling attempt at vectorization"

This reverts commit df30a990545eafdf5e6a09034d81a97fb0188ba0.
---
 src/Functions/array/arrayDistance.cpp | 84 +++++----------------------
 1 file changed, 14 insertions(+), 70 deletions(-)

diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp
index aa13ee01d9a..670442c0c79 100644
--- a/src/Functions/array/arrayDistance.cpp
+++ b/src/Functions/array/arrayDistance.cpp
@@ -90,92 +90,36 @@ struct L2Distance
         size_t & i_y,
         State<ResultType> & state)
     {
-        __m512 sums1;
-        __m512 sums2;
-        __m512 sums3;
-        __m512 sums4;
-
+        __m512 sums;
         if constexpr (std::is_same_v<ResultType, Float32>)
-        {
-            sums1 = _mm512_setzero_ps();
-            sums2 = _mm512_setzero_ps();
-            sums3 = _mm512_setzero_ps();
-            sums4 = _mm512_setzero_ps();
-        }
+            sums = _mm512_setzero_ps();
         else
-        {
-            sums1 = _mm512_setzero_pd();
-            sums2 = _mm512_setzero_pd();
-            sums3 = _mm512_setzero_pd();
-            sums4 = _mm512_setzero_pd();
-        }
+            sums = _mm512_setzero_pd();
 
-        const size_t n = (std::is_same_v<ResultType, Float32>) ? 64 : 32;
+        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
 
         for (; i_x + n < i_max; i_x += n, i_y += n)
         {
             if constexpr (std::is_same_v<ResultType, Float32>)
             {
-                __m512 x1 = _mm512_loadu_ps(data_x + i_x);
-                __m512 y1 = _mm512_loadu_ps(data_y + i_y);
-                __m512 diff1 = _mm512_sub_ps(x1, y1);
-                sums1 = _mm512_fmadd_ps(diff1, diff1, sums1);
-
-                __m512 x2 = _mm512_loadu_ps(data_x + i_x + 16);
-                __m512 y2 = _mm512_loadu_ps(data_y + i_y + 16);
-                __m512 diff2 = _mm512_sub_ps(x2, y2);
-                sums2 = _mm512_fmadd_ps(diff2, diff2, sums2);
-
-                __m512 x3 = _mm512_loadu_ps(data_x + i_x + 32);
-                __m512 y3 = _mm512_loadu_ps(data_y + i_y + 32);
-                __m512 diff3 = _mm512_sub_ps(x3, y3);
-                sums3 = _mm512_fmadd_ps(diff3, diff3, sums3);
-
-                __m512 x4 = _mm512_loadu_ps(data_x + i_x + 48);
-                __m512 y4 = _mm512_loadu_ps(data_y + i_y + 48);
-                __m512 diff4 = _mm512_sub_ps(x4, y4);
-                sums4 = _mm512_fmadd_ps(diff4, diff4, sums4);
+                __m512 x = _mm512_loadu_ps(data_x + i_x);
+                __m512 y = _mm512_loadu_ps(data_y + i_y);
+                __m512 differences = _mm512_sub_ps(x, y);
+                sums = _mm512_fmadd_ps(differences, differences, sums);
             }
             else
             {
-                __m512 x1 = _mm512_loadu_pd(data_x + i_x);
-                __m512 y1 = _mm512_loadu_pd(data_y + i_y);
-                __m512 diff1 = _mm512_sub_pd(x1, y1);
-                sums1 = _mm512_fmadd_pd(diff1, diff1, sums1);
-
-                __m512 x2 = _mm512_loadu_pd(data_x + i_x + 8);
-                __m512 y2 = _mm512_loadu_pd(data_y + i_y + 8);
-                __m512 diff2 = _mm512_sub_pd(x2, y2);
-                sums2 = _mm512_fmadd_pd(diff2, diff2, sums2);
-
-                __m512 x3 = _mm512_loadu_pd(data_x + i_x + 16);
-                __m512 y3 = _mm512_loadu_pd(data_y + i_y + 16);
-                __m512 diff3 = _mm512_sub_pd(x3, y3);
-                sums3 = _mm512_fmadd_pd(diff3, diff3, sums3);
-
-                __m512 x4 = _mm512_loadu_pd(data_x + i_x + 24);
-                __m512 y4 = _mm512_loadu_pd(data_y + i_y + 24);
-                __m512 diff4 = _mm512_sub_pd(x4, y4);
-                sums4 = _mm512_fmadd_pd(diff4, diff4, sums4);
+                __m512 x = _mm512_loadu_pd(data_x + i_x);
+                __m512 y = _mm512_loadu_pd(data_y + i_y);
+                __m512 differences = _mm512_sub_pd(x, y);
+                sums = _mm512_fmadd_pd(differences, differences, sums);
             }
         }
 
         if constexpr (std::is_same_v<ResultType, Float32>)
-        {
-            Float32 sum1 = _mm512_reduce_add_ps(sums1);
-            Float32 sum2 = _mm512_reduce_add_ps(sums2);
-            Float32 sum3 = _mm512_reduce_add_ps(sums3);
-            Float32 sum4 = _mm512_reduce_add_ps(sums4);
-            state.sum = sum1 + sum2 + sum3 + sum4;
-        }
+            state.sum = _mm512_reduce_add_ps(sums);
         else
-        {
-            Float64 sum1 = _mm512_reduce_add_pd(sums1);
-            Float64 sum2 = _mm512_reduce_add_pd(sums2);
-            Float64 sum3 = _mm512_reduce_add_pd(sums3);
-            Float64 sum4 = _mm512_reduce_add_pd(sums4);
-            state.sum = sum1 + sum2 + sum3 + sum4;
-        }
+            state.sum = _mm512_reduce_add_pd(sums);
     }
 #endif
 

From df0c018a9be06e9ccbfb40460f29b155aa86b57f Mon Sep 17 00:00:00 2001
From: Hongbin Ma <mahongbin@apache.org>
Date: Fri, 12 Jan 2024 16:09:09 +0800
Subject: [PATCH 116/884] support T64 for date32 type

---
 src/Compression/CompressionCodecT64.cpp       |  6 +++++
 .../00873_t64_codec_date.reference            |  4 +++
 .../0_stateless/00873_t64_codec_date.sql      | 26 +++++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 tests/queries/0_stateless/00873_t64_codec_date.reference
 create mode 100644 tests/queries/0_stateless/00873_t64_codec_date.sql

diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index bf9a9414bc1..42c6a18aa77 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -91,6 +91,7 @@ enum class MagicNumber : uint8_t
     Decimal32   = 19,
     Decimal64   = 20,
     IPv4        = 21,
+    Date32      = 22,
 };
 
 MagicNumber serializeTypeId(std::optional<TypeIndex> type_id)
@@ -109,6 +110,7 @@ MagicNumber serializeTypeId(std::optional<TypeIndex> type_id)
         case TypeIndex::Int32:      return MagicNumber::Int32;
         case TypeIndex::Int64:      return MagicNumber::Int64;
         case TypeIndex::Date:       return MagicNumber::Date;
+        case TypeIndex::Date32:     return MagicNumber::Date32;
         case TypeIndex::DateTime:   return MagicNumber::DateTime;
         case TypeIndex::DateTime64: return MagicNumber::DateTime64;
         case TypeIndex::Enum8:      return MagicNumber::Enum8;
@@ -137,6 +139,7 @@ TypeIndex deserializeTypeId(uint8_t serialized_type_id)
         case MagicNumber::Int32:        return TypeIndex::Int32;
         case MagicNumber::Int64:        return TypeIndex::Int64;
         case MagicNumber::Date:         return TypeIndex::Date;
+        case MagicNumber::Date32:       return TypeIndex::Date32;
         case MagicNumber::DateTime:     return TypeIndex::DateTime;
         case MagicNumber::DateTime64:   return TypeIndex::DateTime64;
         case MagicNumber::Enum8:        return TypeIndex::Enum8;
@@ -177,6 +180,8 @@ TypeIndex baseType(TypeIndex type_idx)
         case TypeIndex::Enum16:
         case TypeIndex::Date:
             return TypeIndex::UInt16;
+        case TypeIndex::Date32:
+            return TypeIndex::Int32;
         case TypeIndex::UInt32:
         case TypeIndex::DateTime:
         case TypeIndex::IPv4:
@@ -205,6 +210,7 @@ TypeIndex typeIdx(const IDataType * data_type)
         case TypeIndex::UInt16:
         case TypeIndex::Enum16:
         case TypeIndex::Date:
+        case TypeIndex::Date32:
         case TypeIndex::Int32:
         case TypeIndex::UInt32:
         case TypeIndex::IPv4:
diff --git a/tests/queries/0_stateless/00873_t64_codec_date.reference b/tests/queries/0_stateless/00873_t64_codec_date.reference
new file mode 100644
index 00000000000..1568c3122e6
--- /dev/null
+++ b/tests/queries/0_stateless/00873_t64_codec_date.reference
@@ -0,0 +1,4 @@
+1970-01-01	1970-01-01	1950-01-01	1950-01-01
+1970-01-01	1970-01-01	1970-01-01	1970-01-01
+2149-06-06	2149-06-06	2149-06-08	2149-06-08
+2149-06-06	2149-06-06	2149-06-06	2149-06-06
diff --git a/tests/queries/0_stateless/00873_t64_codec_date.sql b/tests/queries/0_stateless/00873_t64_codec_date.sql
new file mode 100644
index 00000000000..e9230c75665
--- /dev/null
+++ b/tests/queries/0_stateless/00873_t64_codec_date.sql
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS t64;
+
+CREATE TABLE t64
+(
+    date16 Date,
+    t_date16 Date Codec(T64, ZSTD),
+    date_32 Date32,
+    t_date32 Date32 Codec(T64, ZSTD)
+) ENGINE MergeTree() ORDER BY tuple();
+
+INSERT INTO t64 values ('1970-01-01', '1970-01-01', '1970-01-01', '1970-01-01');
+INSERT INTO t64 values ('2149-06-06', '2149-06-06', '2149-06-06', '2149-06-06');
+INSERT INTO t64 values ('2149-06-08', '2149-06-08', '2149-06-08', '2149-06-08');
+INSERT INTO t64 values ('1950-01-01', '1950-01-01', '1950-01-01', '1950-01-01');
+
+SELECT * FROM t64 ORDER BY date16;
+
+SELECT * FROM t64 WHERE date16 != t_date16;
+SELECT * FROM t64 WHERE date_32 != t_date32;
+
+OPTIMIZE TABLE t64 FINAL;
+
+SELECT * FROM t64 WHERE date16 != t_date16;
+SELECT * FROM t64 WHERE date_32 != t_date32;
+
+DROP TABLE t64;

From 2e7ce5b0e208c91874d44eb0c828a1e01544a387 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 22 Jan 2024 16:24:43 +0100
Subject: [PATCH 117/884] Updated settings ptr and fetching of client from Disk
 & ObjectStorage

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     | 32 ++++++++-----------
 src/Backups/BackupIO_AzureBlobStorage.h       |  4 +--
 .../AzureBlobStorage/AzureObjectStorage.h     |  7 +++-
 .../Cached/CachedObjectStorage.h              |  8 +++++
 src/Disks/ObjectStorages/IObjectStorage.h     | 13 ++++++++
 .../copyAzureBlobStorageFile.cpp              | 22 ++++++-------
 .../copyAzureBlobStorageFile.h                |  4 +--
 src/Storages/StorageAzureBlob.cpp             |  2 +-
 8 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 8c6c1040eec..fca324869ae 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -37,13 +37,12 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     , configuration(configuration_)
 {
     auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
-    settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
-    auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          std::move(settings_as_unique_ptr),
+                                                          StorageAzureBlob::createSettings(context_),
                                                           configuration_.container);
-    client = object_storage->getClient();
+    client = object_storage->getAzureBlobStorageClient();
+    settings = object_storage->getSettings();
 }
 
 BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
@@ -89,8 +88,8 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const
         key = file_name;
     }
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client.get(), key, read_settings, settings->max_single_read_retries,
-        settings->max_single_download_retries);
+        client.get(), key, read_settings, settings.get()->max_single_read_retries,
+        settings.get()->max_single_download_retries);
 }
 
 void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
@@ -98,10 +97,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
 {
     LOG_INFO(&Poco::Logger::get("BackupReaderAzureBlobStorage"), "Enter copyFileToDisk");
 
-    /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
-    /// We don't check for `has_throttling` here because the native copy almost doesn't use network.
     auto destination_data_source_description = destination_disk->getDataSourceDescription();
-    if (destination_data_source_description.sameKind(data_source_description)
+    if ((destination_data_source_description.type == DataSourceType::AzureBlobStorage)
         && (destination_data_source_description.is_encrypted == encrypted_in_backup))
     {
         LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
@@ -115,7 +112,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
 
             copyAzureBlobStorageFile(
                 client,
-                reinterpret_cast<AzureObjectStorage *>(destination_disk->getObjectStorage().get())->getClient(),
+                destination_disk->getObjectStorage()->getAzureBlobStorageClient(),
                 configuration.container,
                 fs::path(configuration.blob_path) / path_in_backup,
                 0,
@@ -150,13 +147,12 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     , configuration(configuration_)
 {
     auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
-    settings = StorageAzureBlob::createSettingsAsSharedPtr(context_);
-    auto settings_as_unique_ptr = StorageAzureBlob::createSettings(context_);
     object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          std::move(settings_as_unique_ptr),
+                                                          StorageAzureBlob::createSettings(context_),
                                                           configuration_.container);
-    client = object_storage->getClient();
+    client = object_storage->getAzureBlobStorageClient();
+    settings = object_storage->getSettings();
 }
 
 void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@@ -172,7 +168,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
         {
             LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
             copyAzureBlobStorageFile(
-                reinterpret_cast<AzureObjectStorage *>(src_disk->getObjectStorage().get())->getClient(),
+                src_disk->getObjectStorage()->getAzureBlobStorageClient(),
                 client,
                 /* src_container */ blob_path[1],
                 /* src_path */ blob_path[0],
@@ -267,8 +263,8 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
     }
 
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client.get(), key, read_settings, settings->max_single_read_retries,
-        settings->max_single_download_retries);
+        client.get(), key, read_settings, settings.get()->max_single_read_retries,
+        settings.get()->max_single_download_retries);
 }
 
 std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
@@ -285,7 +281,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
     return std::make_unique<WriteBufferFromAzureBlobStorage>(
         client.get(),
         key,
-        settings->max_single_part_upload_size,
+        settings.get()->max_single_part_upload_size,
         DBMS_DEFAULT_BUFFER_SIZE,
         write_settings);
 }
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 12bf073cd08..87dc470cdb3 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -31,7 +31,7 @@ private:
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
-    std::shared_ptr<AzureObjectStorageSettings> settings;
+    MultiVersion<AzureObjectStorageSettings> settings;
 };
 
 class BackupWriterAzureBlobStorage : public BackupWriterDefault
@@ -60,7 +60,7 @@ private:
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
-    std::shared_ptr<AzureObjectStorageSettings> settings;
+    MultiVersion<AzureObjectStorageSettings> settings;
 };
 
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 52d535054ff..a9d082539e6 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -139,7 +139,12 @@ public:
 
     bool isRemote() const override { return true; }
 
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getClient() { return client; }
+    MultiVersion<AzureObjectStorageSettings> & getSettings() { return settings; }
+
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient() override
+    {
+        return client;
+    }
 
 private:
     const String name;
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 4c185db051d..6b0ff8be58a 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -3,6 +3,7 @@
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Interpreters/Cache/FileCache.h>
 #include <Interpreters/Cache/FileCacheSettings.h>
+#include "config.h"
 
 namespace Poco
 {
@@ -118,6 +119,13 @@ public:
 
     static bool canUseReadThroughCache(const ReadSettings & settings);
 
+#if USE_AZURE_BLOB_STORAGE
+    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient() override
+    {
+        return object_storage->getAzureBlobStorageClient();
+    }
+#endif
+
 private:
     FileCache::Key getCacheKey(const std::string & path) const;
 
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index f405be72287..cf113586ddf 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -23,7 +23,12 @@
 #include <Disks/DirectoryIterator.h>
 #include <Common/ThreadPool.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include "config.h"
 
+#if USE_AZURE_BLOB_STORAGE
+#include <Common/MultiVersion.h>
+#include <azure/storage/blobs.hpp>
+#endif
 
 namespace DB
 {
@@ -212,6 +217,14 @@ public:
 
     virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
 
+#if USE_AZURE_BLOB_STORAGE
+    virtual MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient()
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage");
+    }
+#endif
+
+
 private:
     mutable std::mutex throttlers_mutex;
     ThrottlerPtr remote_read_throttler;
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 272be914cc1..bb8702e9b41 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -49,7 +49,7 @@ namespace
             size_t total_size_,
             const String & dest_container_,
             const String & dest_blob_,
-            std::shared_ptr<AzureObjectStorageSettings> settings_,
+            MultiVersion<AzureObjectStorageSettings> settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_azure_blob_storage_,
@@ -65,7 +65,7 @@ namespace
             , schedule(schedule_)
             , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
             , log(log_)
-            , max_single_part_upload_size(settings_->max_single_part_upload_size)
+            , max_single_part_upload_size(settings_.get()->max_single_part_upload_size)
         {
         }
 
@@ -78,7 +78,7 @@ namespace
         size_t total_size;
         const String & dest_container;
         const String & dest_blob;
-        std::shared_ptr<AzureObjectStorageSettings> settings;
+        MultiVersion<AzureObjectStorageSettings> settings;
         const std::optional<std::map<String, String>> & object_metadata;
         ThreadPoolCallbackRunner<void> schedule;
         bool for_disk_azure_blob_storage;
@@ -114,9 +114,9 @@ namespace
             if (!total_size)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen");
 
-            auto max_part_number = settings->max_part_number;
-            auto min_upload_part_size = settings->min_upload_part_size;
-            auto max_upload_part_size = settings->max_upload_part_size;
+            auto max_part_number = settings.get()->max_part_number;
+            auto min_upload_part_size = settings.get()->min_upload_part_size;
+            auto max_upload_part_size = settings.get()->max_upload_part_size;
 
             if (!max_part_number)
                 throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0");
@@ -333,7 +333,7 @@ void copyDataToAzureBlobStorageFile(
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
     const String & dest_container,
     const String & dest_blob,
-    std::shared_ptr<AzureObjectStorageSettings> settings,
+    MultiVersion<AzureObjectStorageSettings> settings,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
@@ -352,14 +352,14 @@ void copyAzureBlobStorageFile(
     size_t size,
     const String & dest_container,
     const String & dest_blob,
-    std::shared_ptr<AzureObjectStorageSettings> settings,
+    MultiVersion<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
 
-    if (settings->use_native_copy)
+    if (settings.get()->use_native_copy)
     {
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)
@@ -393,8 +393,8 @@ void copyAzureBlobStorageFile(
         LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container, src_blob);
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client.get(), src_blob, read_settings, settings->max_single_read_retries,
-            settings->max_single_download_retries);
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client.get(), src_blob, read_settings, settings.get()->max_single_read_retries,
+            settings.get()->max_single_download_retries);
         };
 
         UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container, dest_blob, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index b022151d32d..491f7cd7176 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -29,7 +29,7 @@ void copyAzureBlobStorageFile(
     size_t src_size,
     const String & dest_container,
     const String & dest_blob,
-    std::shared_ptr<AzureObjectStorageSettings> settings,
+    MultiVersion<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
@@ -48,7 +48,7 @@ void copyDataToAzureBlobStorageFile(
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client,
     const String & dest_container,
     const String & dest_blob,
-    std::shared_ptr<AzureObjectStorageSettings> settings,
+    MultiVersion<AzureObjectStorageSettings> settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 7a40d2dcb73..e54838c7a61 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1214,7 +1214,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
     QueryPipelineBuilder builder;
     std::shared_ptr<ISource> source;
     std::unique_ptr<ReadBuffer> read_buf;
-        std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
+    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
         ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt;
     if (num_rows_from_cache)
     {

From 7b235fe643e744b643be6e4d0788de63cae4a07c Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 22 Jan 2024 22:59:59 +0200
Subject: [PATCH 118/884] #31363 - remove schema delimiter setting and add test
 00937_format_schema_rows_template.sh and reference

---
 src/Formats/FormatFactory.cpp                 |  1 -
 .../Impl/TemplateBlockOutputFormat.cpp        | 15 +++------
 ...0937_format_schema_rows_template.reference |  4 +++
 .../00937_format_schema_rows_template.sh      | 32 +++++++++++++++++++
 4 files changed, 40 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/00937_format_schema_rows_template.reference
 create mode 100755 tests/queries/0_stateless/00937_format_schema_rows_template.sh

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 6f7f758621c..184778a9fa9 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -167,7 +167,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.template_settings.row_format = settings.format_template_row;
     format_settings.template_settings.row_format_schema = settings.format_schema_rows_template;
-    format_settings.template_settings.row_between_delimiter_schema = settings.format_schema_rows_between_delimiter;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
     format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
     format_settings.tsv.enum_as_number = settings.input_format_tsv_enum_as_number;
diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
index 495cc0e541e..99a7f59c09e 100644
--- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
@@ -221,21 +221,14 @@ void registerOutputFormatTemplate(FormatFactory & factory)
         };
         if (settings.template_settings.row_format.empty())
         {
-            if (settings.template_settings.row_format_schema.empty())
-            {
-                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template");
-            }
-            else
-            {
-                row_format = ParsedTemplateFormatString();
-                row_format.parse(settings.template_settings.row_format_schema,idx_by_name);
-            }
+            row_format = ParsedTemplateFormatString();
+            row_format.parse(settings.template_settings.row_format_schema,idx_by_name);
         }
         else
         {
-            if (settings.template_settings.row_format_schema.empty())
+            if (!settings.template_settings.row_format_schema.empty())
             {
-                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template");
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template, but not both");
             }
             row_format = ParsedTemplateFormatString(
                 FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.reference b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
new file mode 100644
index 00000000000..167f16ec55f
--- /dev/null
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
@@ -0,0 +1,4 @@
+Question: 'How awesome is clickhouse?', Answer: 'unbelievably awesome!', Number of Likes: 456, Date: 2016-01-02;
+Question: 'How fast is clickhouse?', Answer: 'Lightning fast!', Number of Likes: 9876543210, Date: 2016-01-03;
+Question: 'Is it opensource', Answer: 'of course it is!', Number of Likes: 789, Date: 2016-01-04
+
diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
new file mode 100755
index 00000000000..651e3618f83
--- /dev/null
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# shellcheck disable=SC2016
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Test format_schema_rows_template setting 
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template";
+$CLICKHOUSE_CLIENT --query="CREATE TABLE template (question String, answer String, likes UInt64, date Date) ENGINE = Memory";
+$CLICKHOUSE_CLIENT --query="INSERT INTO template VALUES
+('How awesome is clickhouse?', 'unbelievably awesome!', 456, '2016-01-02'),\
+('How fast is clickhouse?', 'Lightning fast!', 9876543210, '2016-01-03'),\
+('Is it opensource', 'of course it is!', 789, '2016-01-04')";
+
+$CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
+format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_rows_between_delimiter = ';\n'";
+
+echo -e "\n"
+
+# Test that if both format_schema_rows_template setting and format_template_row are provided, error is thrown 
+
+echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > "$CURDIR"/00937_template_output_format_row.tmp
+$CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
+format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
+format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_rows_between_delimiter = ';\n'"; -- { serverError 474 }
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE template";
+rm "$CURDIR"/00937_template_output_format_row.tmp
\ No newline at end of file

From 3832a8261a19004e88a32b4bab39f6b46b14daa6 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 22 Jan 2024 23:20:02 +0200
Subject: [PATCH 119/884] #31363 - update documentation for En and Ru

---
 docs/en/interfaces/formats.md | 4 +++-
 docs/ru/interfaces/formats.md | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index a11c3e5ef19..fd44fbf4462 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -253,7 +253,7 @@ This format is also available under the name `TSVRawWithNamesAndNames`.
 
 This format allows specifying a custom format string with placeholders for values with a specified escaping rule.
 
-It uses settings `format_template_resultset`, `format_template_row`, `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further)
+It uses settings `format_template_resultset`, `format_template_row` (`format_schema_rows_template`), `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further)
 
 Setting `format_template_row` specifies the path to the file containing format strings for rows with the following syntax:
 
@@ -279,6 +279,8 @@ the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quo
 
 `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;`
 
+In cases where it is challenging or not possible to deploy format output configuration for the template format to a directory on all nodes in a cluste, or if the format is trivial then `format_schema_rows_template` can be used to pass the template string directly in the query, rather than a path to the file which contains it.
+
 The `format_template_rows_between_delimiter` setting specifies the delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default)
 
 Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names:
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index b4794b02743..8f8197e2221 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -201,7 +201,7 @@ SELECT * FROM nestedt FORMAT TSV
 
 Этот формат позволяет указать произвольную форматную строку, в которую подставляются значения, сериализованные выбранным способом.
 
-Для этого используются настройки `format_template_resultset`, `format_template_row`, `format_template_rows_between_delimiter` и настройки экранирования других форматов (например, `output_format_json_quote_64bit_integers` при экранировании как в `JSON`, см. далее)
+Для этого используются настройки `format_template_resultset`, `format_template_row` (`format_schema_rows_template`), `format_template_rows_between_delimiter` и настройки экранирования других форматов (например, `output_format_json_quote_64bit_integers` при экранировании как в `JSON`, см. далее)
 
 Настройка `format_template_row` задаёт путь к файлу, содержащему форматную строку для строк таблицы, которая должна иметь вид:
 
@@ -227,6 +227,8 @@ SELECT * FROM nestedt FORMAT TSV
 
     `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;`
 
+В тех случаях, когда не удобно или не возможно указать произвольную форматную строку в файле, можно использовать `format_schema_rows_template` указать произвольную форматную строку в запросе.    
+
 Настройка `format_template_rows_between_delimiter` задаёт разделитель между строками, который выводится (или ожмдается при вводе) после каждой строки, кроме последней. По умолчанию `\n`.
 
 Настройка `format_template_resultset` задаёт путь к файлу, содержащему форматную строку для результата. Форматная строка для результата имеет синтаксис аналогичный форматной строке для строк таблицы и позволяет указать префикс, суффикс и способ вывода дополнительной информации. Вместо имён столбцов в ней указываются следующие имена подстановок:

From e78eb41264ebb37d3fd813850a3e55ce7690ecea Mon Sep 17 00:00:00 2001
From: MyroTk <44327070+MyroTk@users.noreply.github.com>
Date: Mon, 22 Jan 2024 15:19:31 -0800
Subject: [PATCH 120/884] Update Dockerfile

---
 docker/test/integration/runner/Dockerfile | 57 +++++++++++------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 2a81db78a3d..dbf90f9b810 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -63,47 +63,46 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
 
 
 RUN python3 -m pip install --no-cache-dir \
-    PyMySQL \
-    aerospike==11.1.0 \
-    asyncio \
+    PyMySQL==1.1.0 \
+    asyncio==3.4.3 \
     avro==1.10.2 \
-    azure-storage-blob \
-    boto3 \
-    cassandra-driver \
+    azure-storage-blob==12.19.0 \
+    boto3==1.34.24 \
+    cassandra-driver==3.29.0 \
     confluent-kafka==1.9.2 \
     delta-spark==2.3.0 \
-    dict2xml \
-    dicttoxml \
+    dict2xml==1.7.4 \
+    dicttoxml==1.7.16 \
     docker==6.1.3 \
     docker-compose==1.29.2 \
-    grpcio \
-    grpcio-tools \
-    kafka-python \
-    kazoo \
-    lz4 \
-    minio \
-    nats-py \
-    protobuf \
+    grpcio==1.60.0 \
+    grpcio-tools==1.60.0 \
+    kafka-python==2.0.2 \
+    kazoo==2.9.0 \
+    lz4==4.3.3 \
+    minio==7.2.3 \
+    nats-py==2.6.0 \
+    protobuf==4.25.2 \
     psycopg2-binary==2.9.6 \
-    pyhdfs \
+    pyhdfs==0.3.1 \
     pymongo==3.11.0 \
     pyspark==3.3.2 \
-    pytest \
+    pytest==7.4.4 \
     pytest-order==1.0.0 \
-    pytest-random \
-    pytest-repeat \
-    pytest-timeout \
-    pytest-xdist \
+    pytest-random==0.2 \
+    pytest-repeat==0.9.3 \
+    pytest-timeout==2.2.0 \
+    pytest-xdist==3.5.0 \
     pytest-reportlog==0.4.0 \
-    pytz \
+    pytz==2023.3.post1 \
     pyyaml==5.3.1 \
-    redis \
-    requests-kerberos \
+    redis==5.0.1 \
+    requests-kerberos==0.14.0 \
     tzlocal==2.1 \
-    retry \
-    bs4 \
-    lxml \
-    urllib3
+    retry==0.9.2 \
+    bs4==0.0.2 \
+    lxml==5.1.0 \
+    urllib3==2.0.7
 # bs4, lxml are for cloud tests, do not delete
 
 # Hudi supports only spark 3.3.*, not 3.4

From 276ccd3d47be40b79abbaf7734f557d578501b19 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 23 Jan 2024 07:18:14 +0200
Subject: [PATCH 121/884] empty commit to restart CI checks


From 992d859e726895dadc9fbab1ebf99acd4b29881c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 23 Jan 2024 14:16:14 +0100
Subject: [PATCH 122/884] Fix style check

---
 src/Disks/ObjectStorages/IObjectStorage.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index cf113586ddf..b7db353fb6a 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -23,6 +23,7 @@
 #include <Disks/DirectoryIterator.h>
 #include <Common/ThreadPool.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/Exception.h>
 #include "config.h"
 
 #if USE_AZURE_BLOB_STORAGE
@@ -33,6 +34,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 class ReadBufferFromFileBase;
 class WriteBufferFromFileBase;
 

From 8e0aea301ee4b416d6bb4bcfdf664756ebff55ec Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 23 Jan 2024 14:29:26 +0000
Subject: [PATCH 123/884] Analyzer: Add cast for ConstantNode from constant
 folding

---
 src/Analyzer/ConstantNode.cpp | 5 ++++-
 tests/analyzer_tech_debt.txt  | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp
index cb05e6ed4e3..69bed3dbe90 100644
--- a/src/Analyzer/ConstantNode.cpp
+++ b/src/Analyzer/ConstantNode.cpp
@@ -128,7 +128,10 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
         }
     }
 
-    if (need_to_add_cast_function)
+    // Add cast if constant was created as a result of constant folding.
+    // Constant folding may lead to type transformation and literal on shard
+    // may have a different type.
+    if (need_to_add_cast_function || source_expression != nullptr)
     {
         auto constant_type_name_ast = std::make_shared<ASTLiteral>(constant_value->getType()->getName());
         return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast));
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 4643d109c3d..dd747fff7df 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -6,7 +6,6 @@
 01155_rename_move_materialized_view
 01214_test_storage_merge_aliases_with_where
 01244_optimize_distributed_group_by_sharding_key
-01268_shard_avgweighted
 01495_subqueries_in_with_statement
 01560_merge_distributed_join
 01584_distributed_buffer_cannot_find_column

From 617cc514b74a610ff1f314f911bfb78c779f0b4b Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 22 Jan 2024 22:55:50 +0000
Subject: [PATCH 124/884] Try to detect file format automatically during schema
 inference if it's unknown

---
 docs/en/interfaces/schema-inference.md        |  48 +-
 programs/local/LocalServer.cpp                |   2 +-
 programs/obfuscator/Obfuscator.cpp            |   2 +-
 src/Client/ClientBase.cpp                     |   2 +-
 src/Common/ErrorCodes.cpp                     |   1 +
 src/Databases/DatabaseFilesystem.cpp          |  15 +-
 src/Formats/FormatFactory.cpp                 |  86 ++--
 src/Formats/FormatFactory.h                   |  28 +-
 src/Formats/ReadSchemaUtils.cpp               | 423 +++++++++++++-----
 src/Formats/ReadSchemaUtils.h                 |  76 +++-
 src/IO/Archives/IArchiveReader.h              |   1 +
 src/IO/Archives/LibArchiveReader.cpp          |   9 +
 src/IO/Archives/LibArchiveReader.h            |   1 +
 src/IO/Archives/ZipArchiveReader.cpp          |   9 +
 src/IO/Archives/ZipArchiveReader.h            |   1 +
 src/Processors/Formats/ISchemaReader.cpp      |   2 +-
 src/Processors/Formats/ISchemaReader.h        |   4 +-
 .../Impl/JSONColumnsBlockInputFormatBase.cpp  |   2 +-
 .../Impl/JSONColumnsBlockInputFormatBase.h    |   2 +-
 .../Formats/Impl/JSONRowInputFormat.cpp       |  39 +-
 .../Formats/Impl/JSONRowInputFormat.h         |   5 +-
 .../Formats/Impl/TemplateRowInputFormat.cpp   |   4 +-
 .../Formats/Impl/ValuesBlockInputFormat.h     |   2 +-
 src/Server/TCPHandler.cpp                     |   1 -
 src/Storages/DataLakes/IStorageDataLake.h     |  14 +-
 .../DataLakes/Iceberg/StorageIceberg.cpp      |   4 +-
 .../DataLakes/Iceberg/StorageIceberg.h        |   8 +-
 src/Storages/HDFS/StorageHDFS.cpp             | 142 ++++--
 src/Storages/HDFS/StorageHDFS.h               |  17 +-
 src/Storages/HDFS/StorageHDFSCluster.cpp      |  22 +-
 src/Storages/HDFS/StorageHDFSCluster.h        |   6 +-
 src/Storages/IStorageCluster.cpp              |   7 +-
 src/Storages/IStorageCluster.h                |   7 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |   8 +-
 src/Storages/StorageAzureBlob.cpp             | 159 +++++--
 src/Storages/StorageAzureBlob.h               |  36 +-
 src/Storages/StorageAzureBlobCluster.cpp      |  22 +-
 src/Storages/StorageAzureBlobCluster.h        |   5 +-
 src/Storages/StorageFile.cpp                  | 421 +++++++++++------
 src/Storages/StorageFile.h                    |  36 +-
 src/Storages/StorageFileCluster.cpp           |  32 +-
 src/Storages/StorageFileCluster.h             |   8 +-
 src/Storages/StorageS3.cpp                    | 191 ++++++--
 src/Storages/StorageS3.h                      |  28 +-
 src/Storages/StorageS3Cluster.cpp             |  31 +-
 src/Storages/StorageS3Cluster.h               |   5 +-
 src/Storages/StorageURL.cpp                   | 190 ++++++--
 src/Storages/StorageURL.h                     |  36 +-
 src/Storages/StorageURLCluster.cpp            |  40 +-
 src/Storages/StorageURLCluster.h              |  10 +-
 src/Storages/StorageXDBC.cpp                  |   4 +-
 src/Storages/StorageXDBC.h                    |   4 +-
 src/TableFunctions/ITableFunctionCluster.h    |   5 +-
 src/TableFunctions/ITableFunctionFileLike.cpp |  36 +-
 src/TableFunctions/ITableFunctionFileLike.h   |   4 +-
 .../TableFunctionAzureBlobStorage.cpp         | 124 +++--
 .../TableFunctionAzureBlobStorage.h           |   2 +-
 .../TableFunctionAzureBlobStorageCluster.cpp  |   6 +-
 src/TableFunctions/TableFunctionFile.cpp      |   9 +-
 src/TableFunctions/TableFunctionFile.h        |   2 +-
 .../TableFunctionFileCluster.cpp              |   3 +-
 src/TableFunctions/TableFunctionFormat.cpp    |  45 +-
 src/TableFunctions/TableFunctionHDFS.cpp      |   2 +
 .../TableFunctionHDFSCluster.cpp              |   3 +-
 src/TableFunctions/TableFunctionS3.cpp        |  90 +++-
 src/TableFunctions/TableFunctionS3.h          |   2 +-
 src/TableFunctions/TableFunctionS3Cluster.cpp |   6 +-
 src/TableFunctions/TableFunctionURL.cpp       |  39 +-
 src/TableFunctions/TableFunctionURL.h         |   5 +-
 .../TableFunctionURLCluster.cpp               |   3 +-
 tests/integration/test_file_cluster/test.py   |  88 ++++
 tests/integration/test_s3_cluster/test.py     |  34 +-
 .../test_storage_azure_blob_storage/test.py   |  70 +++
 .../test_cluster.py                           |  69 +++
 tests/integration/test_storage_hdfs/test.py   |  68 +++
 tests/integration/test_storage_s3/test.py     |  54 +++
 .../02969_auto_format_detection.reference     | 123 +++++
 .../02969_auto_format_detection.sh            |  46 ++
 78 files changed, 2433 insertions(+), 763 deletions(-)
 create mode 100644 tests/queries/0_stateless/02969_auto_format_detection.reference
 create mode 100755 tests/queries/0_stateless/02969_auto_format_detection.sh

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 4db1d53987a..d255688da1f 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -13,7 +13,7 @@ can control it.
 
 Schema inference is used when ClickHouse needs to read the data in a specific data format and the structure is unknown.
 
-## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md).
+## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md), [azureBlobStorage](../sql-reference/table-functions/azureBlobStorage.md).
 
 These table functions have the optional argument `structure` with the structure of input data. If this argument is not specified or set to `auto`, the structure will be inferred from the data.
 
@@ -55,7 +55,7 @@ DESCRIBE file('hobbies.jsonl')
 └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md)
+## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md), [azureBlobStorage](./engines/table-engines/integrations/azureBlobStorage.md)
 
 If the list of columns is not specified in `CREATE TABLE` query, the structure of the table will be inferred automatically from the data.
 
@@ -1061,7 +1061,7 @@ $$)
 └──────────────┴───────────────┘
 ```
 
-## Values {#values}
+### Values {#values}
 
 In Values format ClickHouse extracts column value from the row and then parses it using
 the recursive parser similar to how literals are parsed.
@@ -1986,3 +1986,45 @@ Note:
 - As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc).
 - If ClickHouse cannot infer the schema from one of the files, the exception will be thrown.
 - If you have a lot of files, reading schema from all of them can take a lot of time.
+
+
+## Automatic format detection {#autimatic-format-detection}
+
+If data format is not specified and cannot be determined by the file extension, ClickHouse will try to detect the file format by its content.
+
+**Examples:**
+
+Let's say we have `data` with the next content:
+`data1`:
+```
+"a","b"
+1,"Data1"
+2,"Data2"
+3,"Data3"
+```
+
+We can inspect and query this file without specifying format or structure:
+```sql
+:) desc file(data);
+```
+
+```text
+┌─name─┬─type─────────────┐
+│ a    │ Nullable(Int64)  │
+│ b    │ Nullable(String) │
+└──────┴──────────────────┘
+```
+
+```sql
+:) select * from file(data);
+```
+
+```text
+┌─a─┬─b─────┐
+│ 1 │ Data1 │
+│ 2 │ Data2 │
+│ 3 │ Data3 │
+└───┴───────┘
+```
+
+**Note:** ClickHouse can detect only some subset of formats and this detection takes some time, it's always better to specify the format explicitly.
\ No newline at end of file
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 4e0b9eeb731..dd96532aadd 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -348,7 +348,7 @@ std::string LocalServer::getInitialCreateTableQuery()
         /// Use regular file
         auto file_name = config().getString("table-file");
         table_file = quoteString(file_name);
-        format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name, false);
+        format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name);
     }
 
     auto data_format = backQuoteIfNeed(
diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 7e09d5e8046..242e995e466 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -1310,7 +1310,7 @@ try
             throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
 
         SingleReadBufferIterator read_buffer_iterator(std::move(file));
-        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
+        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
     }
     else
     {
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index e099aac0de9..01eff0d3e4c 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1508,7 +1508,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
 
         String current_format = parsed_insert_query->format;
         if (current_format.empty())
-            current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);
+            current_format = FormatFactory::instance().getFormatFromFileName(in_file);
 
         /// Create temporary storage file, to support globs and parallel reading
         /// StorageFile doesn't support ephemeral/materialized/alias columns.
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 975970bbeeb..01d1d2c679b 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -592,6 +592,7 @@
     M(710, FAULT_INJECTED) \
     M(711, FILECACHE_ACCESS_DENIED) \
     M(712, TOO_MANY_MATERIALIZED_VIEWS) \
+    M(713, CANNOT_DETECT_FORMAT) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp
index 5564f1d07cf..4105236f0ef 100644
--- a/src/Databases/DatabaseFilesystem.cpp
+++ b/src/Databases/DatabaseFilesystem.cpp
@@ -146,9 +146,18 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont
     if (!checkTableFilePath(table_path, context_, throw_on_error))
         return {};
 
-    auto format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error);
-    if (format.empty())
-        return {};
+    String format;
+    if (throw_on_error)
+    {
+        format = FormatFactory::instance().getFormatFromFileName(table_path);
+    }
+    else
+    {
+        auto format_maybe = FormatFactory::instance().tryGetFormatFromFileName(table_path);
+        if (!format_maybe)
+            return {};
+        format = *format_maybe;
+    }
 
     auto ast_function_ptr = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path), std::make_shared<ASTLiteral>(format));
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 608f9433d6f..cacb5a510da 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -39,7 +39,7 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name)
     throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name);
 }
 
-FormatSettings getFormatSettings(ContextPtr context)
+FormatSettings getFormatSettings(const ContextPtr & context)
 {
     const auto & settings = context->getSettingsRef();
 
@@ -47,7 +47,7 @@ FormatSettings getFormatSettings(ContextPtr context)
 }
 
 template <typename Settings>
-FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
+FormatSettings getFormatSettings(const ContextPtr & context, const Settings & settings)
 {
     FormatSettings format_settings;
 
@@ -253,16 +253,16 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     return format_settings;
 }
 
-template FormatSettings getFormatSettings<FormatFactorySettings>(ContextPtr context, const FormatFactorySettings & settings);
+template FormatSettings getFormatSettings<FormatFactorySettings>(const ContextPtr & context, const FormatFactorySettings & settings);
 
-template FormatSettings getFormatSettings<Settings>(ContextPtr context, const Settings & settings);
+template FormatSettings getFormatSettings<Settings>(const ContextPtr & context, const Settings & settings);
 
 
 InputFormatPtr FormatFactory::getInput(
     const String & name,
     ReadBuffer & _buf,
     const Block & sample,
-    ContextPtr context,
+    const ContextPtr & context,
     UInt64 max_block_size,
     const std::optional<FormatSettings> & _format_settings,
     std::optional<size_t> _max_parsing_threads,
@@ -425,7 +425,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
     return res;
 }
 
-static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr context)
+static void addExistingProgressToOutputFormat(OutputFormatPtr format, const ContextPtr & context)
 {
     auto element_id = context->getProcessListElementSafe();
     if (element_id)
@@ -444,7 +444,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible(
     const String & name,
     WriteBuffer & buf,
     const Block & sample,
-    ContextPtr context,
+    const ContextPtr & context,
     const std::optional<FormatSettings> & _format_settings) const
 {
     const auto & output_getter = getCreators(name).output_creator;
@@ -482,7 +482,7 @@ OutputFormatPtr FormatFactory::getOutputFormat(
     const String & name,
     WriteBuffer & buf,
     const Block & sample,
-    ContextPtr context,
+    const ContextPtr & context,
     const std::optional<FormatSettings> & _format_settings) const
 {
     const auto & output_getter = getCreators(name).output_creator;
@@ -516,7 +516,7 @@ OutputFormatPtr FormatFactory::getOutputFormat(
 
 String FormatFactory::getContentType(
     const String & name,
-    ContextPtr context,
+    const ContextPtr & context,
     const std::optional<FormatSettings> & _format_settings) const
 {
     const auto & output_getter = getCreators(name).output_creator;
@@ -535,7 +535,7 @@ String FormatFactory::getContentType(
 SchemaReaderPtr FormatFactory::getSchemaReader(
     const String & name,
     ReadBuffer & buf,
-    ContextPtr & context,
+    const ContextPtr & context,
     const std::optional<FormatSettings> & _format_settings) const
 {
     const auto & schema_reader_creator = dict.at(name).schema_reader_creator;
@@ -551,7 +551,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader(
 
 ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
     const String & name,
-    ContextPtr & context,
+    const ContextPtr & context,
     const std::optional<FormatSettings> & _format_settings) const
 {
     const auto & external_schema_reader_creator = dict.at(name).external_schema_reader_creator;
@@ -605,7 +605,7 @@ void FormatFactory::markFormatHasNoAppendSupport(const String & name)
     registerAppendSupportChecker(name, [](const FormatSettings &){ return false; });
 }
 
-bool FormatFactory::checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_)
+bool FormatFactory::checkIfFormatSupportAppend(const String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_)
 {
     auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
     auto & append_support_checker = dict[name].append_support_checker;
@@ -628,10 +628,10 @@ void FormatFactory::registerFileExtension(const String & extension, const String
     file_extension_formats[boost::to_lower_copy(extension)] = format_name;
 }
 
-String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found)
+std::optional<String> FormatFactory::tryGetFormatFromFileName(String file_name)
 {
     if (file_name == "stdin")
-        return getFormatFromFileDescriptor(STDIN_FILENO);
+        return tryGetFormatFromFileDescriptor(STDIN_FILENO);
 
     CompressionMethod compression_method = chooseCompressionMethod(file_name, "");
     if (CompressionMethod::None != compression_method)
@@ -643,43 +643,53 @@ String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_
 
     auto pos = file_name.find_last_of('.');
     if (pos == String::npos)
-    {
-        if (throw_if_not_found)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
-        return "";
-    }
+        return std::nullopt;
 
     String file_extension = file_name.substr(pos + 1, String::npos);
     boost::algorithm::to_lower(file_extension);
     auto it = file_extension_formats.find(file_extension);
     if (it == file_extension_formats.end())
-    {
-        if (throw_if_not_found)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
-        return "";
-    }
+        return std::nullopt;
+
     return it->second;
 }
 
-String FormatFactory::getFormatFromFileDescriptor(int fd)
+String FormatFactory::getFormatFromFileName(String file_name)
+{
+    if (auto format = tryGetFormatFromFileName(file_name))
+        return *format;
+
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the format of the file {} by it's extension", file_name);
+}
+
+std::optional<String> FormatFactory::tryGetFormatFromFileDescriptor(int fd)
 {
 #ifdef OS_LINUX
     std::string proc_path = fmt::format("/proc/self/fd/{}", fd);
     char file_path[PATH_MAX] = {'\0'};
     if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1)
-        return getFormatFromFileName(file_path, false);
-    return "";
+        return tryGetFormatFromFileName(file_path);
+    return std::nullopt;
 #elif defined(OS_DARWIN)
     char file_path[PATH_MAX] = {'\0'};
     if (fcntl(fd, F_GETPATH, file_path) != -1)
-        return getFormatFromFileName(file_path, false);
-    return "";
+        return tryGetFormatFromFileName(file_path, false);
+    return std::nullopt;
 #else
     (void)fd;
-    return "";
+    return std::nullopt;
 #endif
 }
 
+String FormatFactory::getFormatFromFileDescriptor(int fd)
+{
+    if (auto format = tryGetFormatFromFileDescriptor(fd))
+        return *format;
+
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the format of the data by the file descriptor {}", fd);
+}
+
+
 void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)
 {
     auto & target = dict[name].file_segmentation_engine_creator;
@@ -765,7 +775,7 @@ void FormatFactory::registerAdditionalInfoForSchemaCacheGetter(
     target = std::move(additional_info_for_schema_cache_getter);
 }
 
-String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_)
+String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_)
 {
     const auto & additional_info_getter = getCreators(name).additional_info_for_schema_cache_getter;
     if (!additional_info_getter)
@@ -810,7 +820,7 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c
     return target.prefers_large_blocks;
 }
 
-bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, ContextPtr context) const
+bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const
 {
     if (name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order)
         return false;
@@ -825,6 +835,18 @@ void FormatFactory::checkFormatName(const String & name) const
         throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name);
 }
 
+std::vector<String> FormatFactory::getAllInputFormats() const
+{
+    std::vector<String> input_formats;
+    for (const auto & [format_name, creators] : dict)
+    {
+        if (creators.input_creator || creators.random_access_input_creator)
+            input_formats.push_back(format_name);
+    }
+
+    return input_formats;
+}
+
 FormatFactory & FormatFactory::instance()
 {
     static FormatFactory ret;
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 9670c690456..165a20f7c4d 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -48,10 +48,10 @@ using RowOutputFormatPtr = std::shared_ptr<IRowOutputFormat>;
 template <typename Allocator>
 struct Memory;
 
-FormatSettings getFormatSettings(ContextPtr context);
+FormatSettings getFormatSettings(const ContextPtr & context);
 
 template <typename T>
-FormatSettings getFormatSettings(ContextPtr context, const T & settings);
+FormatSettings getFormatSettings(const ContextPtr & context, const T & settings);
 
 /** Allows to create an IInputFormat or IOutputFormat by the name of the format.
   * Note: format and compression are independent things.
@@ -161,7 +161,7 @@ public:
         const String & name,
         ReadBuffer & buf,
         const Block & sample,
-        ContextPtr context,
+        const ContextPtr & context,
         UInt64 max_block_size,
         const std::optional<FormatSettings> & format_settings = std::nullopt,
         std::optional<size_t> max_parsing_threads = std::nullopt,
@@ -178,30 +178,30 @@ public:
         const String & name,
         WriteBuffer & buf,
         const Block & sample,
-        ContextPtr context,
+        const ContextPtr & context,
         const std::optional<FormatSettings> & format_settings = std::nullopt) const;
 
     OutputFormatPtr getOutputFormat(
         const String & name,
         WriteBuffer & buf,
         const Block & sample,
-        ContextPtr context,
+        const ContextPtr & context,
         const std::optional<FormatSettings> & _format_settings = std::nullopt) const;
 
     String getContentType(
         const String & name,
-        ContextPtr context,
+        const ContextPtr & context,
         const std::optional<FormatSettings> & format_settings = std::nullopt) const;
 
     SchemaReaderPtr getSchemaReader(
         const String & name,
         ReadBuffer & buf,
-        ContextPtr & context,
+        const ContextPtr & context,
         const std::optional<FormatSettings> & format_settings = std::nullopt) const;
 
     ExternalSchemaReaderPtr getExternalSchemaReader(
         const String & name,
-        ContextPtr & context,
+        const ContextPtr & context,
         const std::optional<FormatSettings> & format_settings = std::nullopt) const;
 
     void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine);
@@ -216,7 +216,7 @@ public:
     /// registerAppendSupportChecker with append_support_checker that always returns true.
     void markFormatHasNoAppendSupport(const String & name);
 
-    bool checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
+    bool checkIfFormatSupportAppend(const String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
 
     /// Register format by its name.
     void registerInputFormat(const String & name, InputCreator input_creator);
@@ -225,8 +225,10 @@ public:
 
     /// Register file extension for format
     void registerFileExtension(const String & extension, const String & format_name);
-    String getFormatFromFileName(String file_name, bool throw_if_not_found = false);
+    String getFormatFromFileName(String file_name);
+    std::optional<String> tryGetFormatFromFileName(String file_name);
     String getFormatFromFileDescriptor(int fd);
+    std::optional<String> tryGetFormatFromFileDescriptor(int fd);
 
     /// Register schema readers for format its name.
     void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator);
@@ -244,16 +246,18 @@ public:
     bool checkIfFormatHasAnySchemaReader(const String & name) const;
     bool checkIfOutputFormatPrefersLargeBlocks(const String & name) const;
 
-    bool checkParallelizeOutputAfterReading(const String & name, ContextPtr context) const;
+    bool checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const;
 
     void registerAdditionalInfoForSchemaCacheGetter(const String & name, AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter);
-    String getAdditionalInfoForSchemaCache(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
+    String getAdditionalInfoForSchemaCache(const String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
 
     const FormatsDictionary & getAllFormats() const
     {
         return dict;
     }
 
+    std::vector<String> getAllInputFormats() const;
+
     bool isInputFormat(const String & name) const;
     bool isOutputFormat(const String & name) const;
 
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 43931be3449..b4fba7b9ce6 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -4,6 +4,7 @@
 #include <Common/assert_cast.h>
 #include <IO/WithFileSize.h>
 #include <IO/EmptyReadBuffer.h>
+#include <IO/PeekableReadBuffer.h>
 
 namespace DB
 {
@@ -14,7 +15,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int CANNOT_DETECT_FORMAT;
     extern const int TYPE_MISMATCH;
+    extern const int LOGICAL_ERROR;
 }
 
 static std::optional<NamesAndTypesList> getOrderedColumnsList(const NamesAndTypesList & columns_list, const Names & columns_order_hint)
@@ -43,48 +46,86 @@ bool isRetryableSchemaInferenceError(int code)
     return code == ErrorCodes::EMPTY_DATA_PASSED || code == ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA;
 }
 
-ColumnsDescription readSchemaFromFormat(
-    const String & format_name,
+/// Order of formats to try in automatic format detection.
+/// If we can successfully detect some format, we won't try next ones.
+static const std::vector<String> & getFormatsOrderForDetection()
+{
+    static const std::vector<String> formats_order =
+    {
+        "Parquet",
+        "ORC",
+        "Arrow",
+        "ArrowStream",
+        "Avro",
+        "AvroConfluent",
+        "Npy",
+        "Native",
+        "BSONEachRow",
+        "JSONCompact",
+        "Values",
+        "TSKV",
+        "JSONObjectEachRow",
+        "JSONColumns",
+        "JSONCompactColumns",
+        "JSONCompact",
+        "JSON",
+    };
+
+    return formats_order;
+}
+
+/// The set of similar formats to try in automatic format detection.
+/// We will try all formats from this set and then choose the best one
+/// according to inferred schema.
+static const std::vector<String> & getSimilarFormatsSetForDetection()
+{
+    static const std::vector<String> formats_order =
+    {
+        "TSV",
+        "CSV",
+    };
+
+    return formats_order;
+}
+
+std::pair<ColumnsDescription, String> readSchemaFromFormatImpl(
+    std::optional<String> format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    bool retry,
-    ContextPtr & context,
-    std::unique_ptr<ReadBuffer> & buf)
+    const ContextPtr & context)
 try
 {
     NamesAndTypesList names_and_types;
     SchemaInferenceMode mode = context->getSettingsRef().schema_inference_mode;
-    if (mode == SchemaInferenceMode::UNION && !FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings))
+    if (format_name && mode == SchemaInferenceMode::UNION && !FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(*format_name, context, format_settings))
     {
         String additional_message;
         /// Better exception message for WithNames(AndTypes) formats.
-        if (format_name.ends_with("WithNames") || format_name.ends_with("WithNamesAndTypes"))
+        if (format_name->ends_with("WithNames") || format_name->ends_with("WithNamesAndTypes"))
             additional_message = " (formats -WithNames(AndTypes) support reading subset of columns only when setting input_format_with_names_use_header is enabled)";
 
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns{}", format_name, additional_message);
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns{}", *format_name, additional_message);
     }
 
-    if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
+    if (format_name && FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format_name))
     {
-        auto external_schema_reader = FormatFactory::instance().getExternalSchemaReader(format_name, context, format_settings);
+        auto external_schema_reader = FormatFactory::instance().getExternalSchemaReader(*format_name, context, format_settings);
         try
         {
-            names_and_types = external_schema_reader->readSchema();
+            return {ColumnsDescription(external_schema_reader->readSchema()), *format_name};
         }
         catch (Exception & e)
         {
             e.addMessage(
-                fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
+                fmt::format("The table structure cannot be extracted from a {} format file. You can specify the structure manually", *format_name));
             throw;
         }
     }
-    else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name))
-    {
-        if (mode == SchemaInferenceMode::UNION)
-            retry = false;
 
+    if (!format_name || FormatFactory::instance().checkIfFormatHasSchemaReader(*format_name))
+    {
+        IReadBufferIterator::Data iterator_data;
         std::vector<std::pair<NamesAndTypesList, String>> schemas_for_union_mode;
-        std::optional<ColumnsDescription> cached_columns;
         std::string exception_messages;
         SchemaReaderPtr schema_reader;
         size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference
@@ -94,45 +135,71 @@ try
         size_t iterations = 0;
         while (true)
         {
+            /// When we finish working with current buffer we should put it back to iterator.
+            SCOPE_EXIT(if (iterator_data.buf) read_buffer_iterator.setPreviousReadBuffer(std::move(iterator_data.buf)));
             bool is_eof = false;
             try
             {
-                read_buffer_iterator.setPreviousReadBuffer(std::move(buf));
-                std::tie(buf, cached_columns) = read_buffer_iterator.next();
-                if (cached_columns)
+                iterator_data = read_buffer_iterator.next();
+
+                /// Read buffer iterator can determine the data format if it's unknown.
+                /// For example by scanning schema cache or by finding new file with format extension.
+                if (!format_name && iterator_data.format_name)
                 {
+                    format_name = *iterator_data.format_name;
+                    read_buffer_iterator.setFormatName(*iterator_data.format_name);
+                }
+
+                if (iterator_data.cached_columns)
+                {
+                    /// If we have schema in cache, we must also know the format.
+                    if (!format_name)
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Schema from cache was returned, but format name is unknown");
+
                     if (mode == SchemaInferenceMode::DEFAULT)
-                        return *cached_columns;
-                    schemas_for_union_mode.emplace_back(cached_columns->getAll(), read_buffer_iterator.getLastFileName());
+                    {
+                        read_buffer_iterator.setResultingSchema(*iterator_data.cached_columns);
+                        return {*iterator_data.cached_columns, *format_name};
+                    }
+
+                    schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFileName());
                     continue;
                 }
 
-                if (!buf)
+                if (!iterator_data.buf)
                     break;
 
                 /// We just want to check for eof, but eof() can be pretty expensive.
                 /// So we use getFileSize() when available, which has better worst case.
                 /// (For remote files, typically eof() would read 1 MB from S3, which may be much
                 ///  more than what the schema reader and even data reader will read).
-                auto size = tryGetFileSizeFromReadBuffer(*buf);
+                auto size = tryGetFileSizeFromReadBuffer(*iterator_data.buf);
                 if (size.has_value())
                     is_eof = *size == 0;
                 else
-                    is_eof = buf->eof();
+                    is_eof = iterator_data.buf->eof();
             }
             catch (Exception & e)
             {
-                e.addMessage(
-                    fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
+                if (format_name)
+                    e.addMessage(fmt::format("The table structure cannot be extracted from a {} format file. You can specify the structure manually", *format_name));
+                else
+                    e.addMessage("The data format cannot be detected by the contents of the files. You can specify the format manually");
                 throw;
             }
             catch (...)
             {
                 auto exception_message = getCurrentExceptionMessage(false);
+                if (format_name)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "The table structure cannot be extracted from a {} format file:\n{}\nYou can specify the structure manually",
+                        *format_name,
+                        exception_message);
+
                 throw Exception(
-                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                    "Cannot extract table structure from {} format file:\n{}\nYou can specify the structure manually",
-                    format_name,
+                    ErrorCodes::CANNOT_DETECT_FORMAT,
+                    "The data format cannot be detected by the contents of the files:\n{}\nYou can specify the format manually",
                     exception_message);
             }
 
@@ -140,91 +207,218 @@ try
 
             if (is_eof)
             {
-                auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name);
+                String exception_message;
+                if (format_name)
+                    exception_message = fmt::format("The table structure cannot be extracted from a {} format file: the file is empty", *format_name);
+                else
+                    exception_message = fmt::format("The data format cannot be detected by the contents of the files: the file is empty");
 
-                if (!retry)
-                    throw Exception(
-                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "{}. You can specify the structure manually", exception_message);
+                if (mode == SchemaInferenceMode::UNION)
+                {
+                    if (!format_name)
+                        throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files: the file is empty. You can specify the format manually");
 
-                exception_messages += "\n" + exception_message;
+                    throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "{}. You can specify the structure manually", exception_message);
+                }
+
+                if (!exception_messages.empty())
+                    exception_messages += "\n";
+                exception_messages += exception_message;
                 continue;
             }
 
-            try
+            if (format_name)
             {
-                schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings);
-                schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
-                names_and_types = schema_reader->readSchema();
-                auto num_rows = schema_reader->readNumberOrRows();
-                if (num_rows)
-                    read_buffer_iterator.setNumRowsToLastFile(*num_rows);
-
-                /// In default mode, we finish when schema is inferred successfully from any file.
-                if (mode == SchemaInferenceMode::DEFAULT)
-                    break;
-
-                if (!names_and_types.empty())
-                    read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
-                schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
-            }
-            catch (...)
-            {
-                auto exception_message = getCurrentExceptionMessage(false);
-                if (schema_reader && mode == SchemaInferenceMode::DEFAULT)
+                try
                 {
-                    size_t rows_read = schema_reader->getNumRowsRead();
-                    assert(rows_read <= max_rows_to_read);
-                    max_rows_to_read -= schema_reader->getNumRowsRead();
-                    size_t bytes_read = buf->count();
-                    /// We could exceed max_bytes_to_read a bit to complete row parsing.
-                    max_bytes_to_read -= std::min(bytes_read, max_bytes_to_read);
-                    if (rows_read != 0 && (max_rows_to_read == 0 || max_bytes_to_read == 0))
-                    {
-                        exception_message += "\nTo increase the maximum number of rows/bytes to read for structure determination, use setting "
-                                             "input_format_max_rows_to_read_for_schema_inference/input_format_max_bytes_to_read_for_schema_inference";
+                    schema_reader = FormatFactory::instance().getSchemaReader(*format_name, *iterator_data.buf, context, format_settings);
+                    schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
+                    names_and_types = schema_reader->readSchema();
+                    auto num_rows = schema_reader->readNumberOrRows();
+                    if (num_rows)
+                        read_buffer_iterator.setNumRowsToLastFile(*num_rows);
 
-                        if (iterations > 1)
+                    /// In default mode, we finish when schema is inferred successfully from any file.
+                    if (mode == SchemaInferenceMode::DEFAULT)
+                        break;
+
+                    if (!names_and_types.empty())
+                        read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
+                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
+                }
+                catch (...)
+                {
+                    auto exception_message = getCurrentExceptionMessage(false);
+                    if (schema_reader && mode == SchemaInferenceMode::DEFAULT)
+                    {
+                        size_t rows_read = schema_reader->getNumRowsRead();
+                        assert(rows_read <= max_rows_to_read);
+                        max_rows_to_read -= schema_reader->getNumRowsRead();
+                        size_t bytes_read = iterator_data.buf->count();
+                        /// We could exceed max_bytes_to_read a bit to complete row parsing.
+                        max_bytes_to_read -= std::min(bytes_read, max_bytes_to_read);
+                        if (rows_read != 0 && (max_rows_to_read == 0 || max_bytes_to_read == 0))
                         {
-                            exception_messages += "\n" + exception_message;
+                            exception_message
+                                += "\nTo increase the maximum number of rows/bytes to read for structure determination, use setting "
+                                   "input_format_max_rows_to_read_for_schema_inference/input_format_max_bytes_to_read_for_schema_inference";
+                            if (!exception_messages.empty())
+                                exception_messages += "\n";
+                            exception_messages += exception_message;
                             break;
                         }
-                        retry = false;
                     }
-                }
 
-                if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode()))
-                {
-                    try
-                    {
-                        throw;
-                    }
-                    catch (Exception & e)
-                    {
-                        e.addMessage(fmt::format(
-                            "Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
-                        throw;
-                    }
-                    catch (...)
+                    if (mode == SchemaInferenceMode::UNION || !isRetryableSchemaInferenceError(getCurrentExceptionCode()))
                     {
                         throw Exception(
                             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file. "
+                            "The table structure cannot be extracted from a {} format file. "
                             "Error: {}. You can specify the structure manually",
-                            format_name,
+                            *format_name,
                             exception_message);
                     }
+
+                    if (!exception_messages.empty())
+                        exception_messages += "\n";
+                    exception_messages += exception_message;
+                }
+            }
+            else
+            {
+                /// If the format is unknown we try some formats in order and try to apply their schema readers.
+                /// If we can successfully infer the schema in some format, most likely we can use this format to read this data.
+
+                /// If read_buffer_iterator supports recreation of last buffer, we will recreate it for
+                /// each format. Otherwise we will use PeekableReadBuffer and will rollback to the
+                /// beginning of the file before each format. Using PeekableReadBuffer can lead
+                /// to high memory usage as it will save all the read data from the beginning of the file,
+                /// especially it will be noticeable for formats like Parquet/ORC/Arrow that do seeks to the
+                /// end of file.
+                std::unique_ptr<PeekableReadBuffer> peekable_buf;
+                bool support_buf_recreation = read_buffer_iterator.supportsLastReadBufferRecreation();
+                if (!support_buf_recreation)
+                {
+                    peekable_buf = std::make_unique<PeekableReadBuffer>(*iterator_data.buf);
+                    peekable_buf->setCheckpoint();
+                }
+
+                /// First, try some formats in order. If we successfully inferred the schema for any format,
+                /// we will use this format.
+                for (const auto & format_to_detect : getFormatsOrderForDetection())
+                {
+                    try
+                    {
+                        schema_reader = FormatFactory::instance().getSchemaReader(format_to_detect, support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
+                        schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
+                        names_and_types = schema_reader->readSchema();
+                        if (names_and_types.empty())
+                            continue;
+
+                        /// We successfully inferred schema from this file using current format.
+                        format_name = format_to_detect;
+                        read_buffer_iterator.setFormatName(format_to_detect);
+
+                        auto num_rows = schema_reader->readNumberOrRows();
+                        if (num_rows)
+                            read_buffer_iterator.setNumRowsToLastFile(*num_rows);
+
+                        break;
+                    }
+                    catch (...)
+                    {
+                        /// We failed to infer the schema for this format.
+                        /// Recreate read buffer or rollback to the beginning of the data
+                        /// before trying next format.
+                        if (support_buf_recreation)
+                        {
+                            read_buffer_iterator.setPreviousReadBuffer(std::move(iterator_data.buf));
+                            iterator_data.buf = read_buffer_iterator.recreateLastReadBuffer();
+                        }
+                        else
+                        {
+                            peekable_buf->rollbackToCheckpoint();
+                        }
+                    }
                 }
 
-                exception_messages += "\n" + exception_message;
+                /// If no format was detected from first set of formats, we try second set.
+                /// In this set formats are similar and it can happen that data matches some of them.
+                /// We try to infer schema for all of the formats from this set and then choose the best
+                /// one according to the inferred schema.
+                if (!format_name)
+                {
+                    std::unordered_map<String, NamesAndTypesList> format_to_schema;
+                    for (const auto & format_to_detect : getSimilarFormatsSetForDetection())
+                    {
+                        try
+                        {
+                            schema_reader = FormatFactory::instance().getSchemaReader(
+                                format_to_detect, support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
+                            schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
+                            auto tmp_names_and_types = schema_reader->readSchema();
+                            /// If schema was inferred successfully for this format, remember it and try next format.
+                            if (!tmp_names_and_types.empty())
+                                format_to_schema[format_to_detect] = tmp_names_and_types;
+                        }
+                        catch (...) // NOLINT(bugprone-empty-catch)
+                        {
+                            /// Try next format.
+                        }
+
+                        if (support_buf_recreation)
+                        {
+                            read_buffer_iterator.setPreviousReadBuffer(std::move(iterator_data.buf));
+                            iterator_data.buf = read_buffer_iterator.recreateLastReadBuffer();
+                        }
+                        else
+                        {
+                            peekable_buf->rollbackToCheckpoint();
+                        }
+                    }
+
+                    /// We choose the format with larger number of columns in inferred schema.
+                    size_t max_number_of_columns = 0;
+                    for (const auto & [format_to_detect, schema] : format_to_schema )
+                    {
+                        if (schema.size() > max_number_of_columns)
+                        {
+                            names_and_types = schema;
+                            format_name = format_to_detect;
+                            max_number_of_columns = schema.size();
+                        }
+                    }
+
+                    if (format_name)
+                        read_buffer_iterator.setFormatName(*format_name);
+                }
+
+                if (mode == SchemaInferenceMode::UNION)
+                {
+                    /// For UNION mode we need to know the schema of each file,
+                    /// if we failed to detect the format, we failed to detect the schema of this file
+                    /// in any format. It doesn't make sense to continue.
+                    if (!format_name)
+                        throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
+
+                    read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
+                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
+                }
+
+                if (format_name && mode == SchemaInferenceMode::DEFAULT)
+                    break;
             }
         }
 
+        if (!format_name)
+            throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
+
         /// If we got all schemas from cache, schema_reader can be uninitialized.
         /// But we still need some stateless methods of ISchemaReader,
         /// let's initialize it with empty buffer.
         EmptyReadBuffer empty;
         if (!schema_reader)
-            schema_reader = FormatFactory::instance().getSchemaReader(format_name, empty, context, format_settings);
+            schema_reader = FormatFactory::instance().getSchemaReader(*format_name, empty, context, format_settings);
 
         if (mode == SchemaInferenceMode::UNION)
         {
@@ -273,11 +467,23 @@ try
         }
 
         if (names_and_types.empty())
+        {
+            if (iterations <= 1)
+            {
+                throw Exception(
+                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                    "The table structure cannot be extracted from a {} format file. "
+                    "Error: {}. You can specify the structure manually",
+                    *format_name,
+                    exception_messages);
+            }
+
             throw Exception(
                 ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                 "All attempts to extract table structure from files failed. "
-                "Errors:{}\nYou can specify the structure manually",
+                "Errors:\n{}\nYou can specify the structure manually",
                 exception_messages);
+        }
 
         /// If we have "INSERT SELECT" query then try to order
         /// columns as they are ordered in table schema for formats
@@ -294,22 +500,22 @@ try
             if (ordered_list)
                 names_and_types = *ordered_list;
         }
+
+        /// Some formats like CSVWithNames can contain empty column names. We don't support empty column names and further processing can fail with an exception. Let's just remove columns with empty names from the structure.
+        names_and_types.erase(
+            std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }),
+            names_and_types.end());
+
+        auto columns = ColumnsDescription(names_and_types);
+        if (mode == SchemaInferenceMode::DEFAULT)
+            read_buffer_iterator.setResultingSchema(columns);
+        return {columns, *format_name};
     }
-    else
-        throw Exception(
-            ErrorCodes::BAD_ARGUMENTS,
-            "{} file format doesn't support schema inference. You must specify the structure manually",
-            format_name);
 
-    /// Some formats like CSVWithNames can contain empty column names. We don't support empty column names and further processing can fail with an exception. Let's just remove columns with empty names from the structure.
-    names_and_types.erase(
-        std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }),
-        names_and_types.end());
-
-    auto columns = ColumnsDescription(names_and_types);
-    if (mode == SchemaInferenceMode::DEFAULT)
-        read_buffer_iterator.setResultingSchema(columns);
-    return columns;
+    throw Exception(
+        ErrorCodes::BAD_ARGUMENTS,
+        "{} file format doesn't support schema inference. You must specify the structure manually",
+        *format_name);
 }
 catch (Exception & e)
 {
@@ -319,16 +525,21 @@ catch (Exception & e)
     throw;
 }
 
-
 ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    bool retry,
-    ContextPtr & context)
+    const ContextPtr & context)
 {
-    std::unique_ptr<ReadBuffer> buf_out;
-    return readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, retry, context, buf_out);
+    return readSchemaFromFormatImpl(format_name, format_settings, read_buffer_iterator, context).first;
+}
+
+std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
+    const std::optional<FormatSettings> & format_settings,
+    IReadBufferIterator & read_buffer_iterator,
+    const ContextPtr & context)
+{
+    return readSchemaFromFormatImpl(std::nullopt, format_settings, read_buffer_iterator, context);
 }
 
 SchemaCache::Key getKeyForSchemaCache(
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index 6aa8f3f9c4c..bb5e068f696 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -7,29 +7,68 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 struct IReadBufferIterator
 {
     virtual ~IReadBufferIterator() = default;
 
-    virtual void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> /* buffer */) {}
-
     /// Return read buffer of the next file or cached schema.
     /// In DEFAULT schema inference mode cached schema can be from any file.
     /// In UNION mode cached schema can be only from current file.
     /// When there is no files to process, return pair (nullptr, nullopt)
-    virtual std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() = 0;
 
+    struct Data
+    {
+        /// Read buffer of the next file. Can be nullptr if there are no more files
+        /// or when schema was found in cache.
+        std::unique_ptr<ReadBuffer> buf;
+
+        /// Schema from cache.
+        /// In DEFAULT schema inference mode cached schema can be from any file.
+        /// In UNION mode cached schema can be only from current file.
+        std::optional<ColumnsDescription> cached_columns;
+
+        /// Format of the file if known.
+        std::optional<String> format_name;
+    };
+
+    virtual Data next() = 0;
+
+    /// Set read buffer returned in previous iteration.
+    virtual void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> /* buffer */) {}
+
+    /// Set number of rows to last file extracted during schema inference.
+    /// Used for caching number of rows from files metadata during schema inference.
     virtual void setNumRowsToLastFile(size_t /*num_rows*/) {}
 
     /// Set schema inferred from last file. Used for UNION mode to cache schema
     /// per file.
     virtual void setSchemaToLastFile(const ColumnsDescription & /*columns*/) {}
+
     /// Set resulting inferred schema. Used for DEFAULT mode to cache schema
     /// for all files.
     virtual void setResultingSchema(const ColumnsDescription & /*columns*/) {}
 
+    /// Set auto detected format name.
+    virtual void setFormatName(const String & /*format_name*/) {}
+
     /// Get last processed file name for better exception messages.
     virtual String getLastFileName() const { return ""; }
+
+    /// Return true if method recreateLastReadBuffer is implemented.
+    virtual bool supportsLastReadBufferRecreation() const { return false; }
+
+    /// Recreate last read buffer to read data from the same file again.
+    /// Used to detect format from the file content to avoid
+    /// copying data.
+    virtual std::unique_ptr<ReadBuffer> recreateLastReadBuffer()
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method recreateLastReadBuffer is not implemented");
+    }
 };
 
 struct SingleReadBufferIterator : public IReadBufferIterator
@@ -39,12 +78,22 @@ public:
     {
     }
 
-    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+    Data next() override
     {
         if (done)
-            return {nullptr, {}};
+            return {nullptr, {}, std::nullopt};
         done = true;
-        return {std::move(buf), {}};
+        return Data{std::move(buf), {}, std::nullopt};
+    }
+
+    void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> buf_) override
+    {
+        buf = std::move(buf_);
+    }
+
+    std::unique_ptr<ReadBuffer> releaseBuffer()
+    {
+        return std::move(buf);
     }
 
 private:
@@ -73,17 +122,16 @@ ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    bool retry,
-    ContextPtr & context);
+    const ContextPtr & context);
 
-/// If ReadBuffer is created, it will be written to buf_out.
-ColumnsDescription readSchemaFromFormat(
-    const String & format_name,
+/// Try to detect the format of the data and it's schema.
+/// It runs schema inference for some set of formats on the same file.
+/// If schema reader of some format successfully inferred the schema from
+/// some file, we consider that the data is in this format.
+std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    bool retry,
-    ContextPtr & context,
-    std::unique_ptr<ReadBuffer> & buf_out);
+    const ContextPtr & context);
 
 SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
 SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h
index 84a1dc21f5b..ee516d2655b 100644
--- a/src/IO/Archives/IArchiveReader.h
+++ b/src/IO/Archives/IArchiveReader.h
@@ -56,6 +56,7 @@ public:
     /// It's possible to convert a file enumerator to a read buffer and vice versa.
     virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;
     virtual std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) = 0;
+    virtual std::unique_ptr<FileEnumerator> currentFile(std::unique_ptr<ReadBuffer> read_buffer) = 0;
 
     virtual std::vector<std::string> getAllFiles() = 0;
     virtual std::vector<std::string> getAllFiles(NameFilter filter) = 0;
diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp
index 763cd3b171b..eb190f2e0fc 100644
--- a/src/IO/Archives/LibArchiveReader.cpp
+++ b/src/IO/Archives/LibArchiveReader.cpp
@@ -340,6 +340,15 @@ std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::nextFile(std
     return std::make_unique<FileEnumeratorImpl>(std::move(handle));
 }
 
+std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::currentFile(std::unique_ptr<ReadBuffer> read_buffer)
+{
+    if (!dynamic_cast<ReadBufferFromLibArchive *>(read_buffer.get()))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()");
+    auto read_buffer_from_libarchive = std::unique_ptr<ReadBufferFromLibArchive>(static_cast<ReadBufferFromLibArchive *>(read_buffer.release()));
+    auto handle = std::move(*read_buffer_from_libarchive).releaseHandle();
+    return std::make_unique<FileEnumeratorImpl>(std::move(handle));
+}
+
 std::vector<std::string> LibArchiveReader::getAllFiles()
 {
     return getAllFiles({});
diff --git a/src/IO/Archives/LibArchiveReader.h b/src/IO/Archives/LibArchiveReader.h
index 3dadd710089..c4b08d8ddf7 100644
--- a/src/IO/Archives/LibArchiveReader.h
+++ b/src/IO/Archives/LibArchiveReader.h
@@ -40,6 +40,7 @@ public:
     /// It's possible to convert a file enumerator to a read buffer and vice versa.
     std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
     std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
+    std::unique_ptr<FileEnumerator> currentFile(std::unique_ptr<ReadBuffer> read_buffer) override;
 
     std::vector<std::string> getAllFiles() override;
     std::vector<std::string> getAllFiles(NameFilter filter) override;
diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp
index 636042ec586..63fdf5fe190 100644
--- a/src/IO/Archives/ZipArchiveReader.cpp
+++ b/src/IO/Archives/ZipArchiveReader.cpp
@@ -589,6 +589,15 @@ std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::nextFile(std
     return std::make_unique<FileEnumeratorImpl>(std::move(handle));
 }
 
+std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::currentFile(std::unique_ptr<ReadBuffer> read_buffer)
+{
+    if (!dynamic_cast<ReadBufferFromZipArchive *>(read_buffer.get()))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()");
+    auto read_buffer_from_zip = std::unique_ptr<ReadBufferFromZipArchive>(static_cast<ReadBufferFromZipArchive *>(read_buffer.release()));
+    auto handle = std::move(*read_buffer_from_zip).releaseHandle();
+    return std::make_unique<FileEnumeratorImpl>(std::move(handle));
+}
+
 std::vector<std::string> ZipArchiveReader::getAllFiles()
 {
     return getAllFiles({});
diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h
index a8788064fec..4b1910839eb 100644
--- a/src/IO/Archives/ZipArchiveReader.h
+++ b/src/IO/Archives/ZipArchiveReader.h
@@ -47,6 +47,7 @@ public:
     /// It's possible to convert a file enumerator to a read buffer and vice versa.
     std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
     std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
+    std::unique_ptr<FileEnumerator> currentFile(std::unique_ptr<ReadBuffer> read_buffer) override;
 
     std::vector<std::string> getAllFiles() override;
     std::vector<std::string> getAllFiles(NameFilter filter) override;
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 26c632b83dc..c5c6ba84d9a 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -81,7 +81,7 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo
 {
 }
 
-void IIRowSchemaReader::setContext(ContextPtr & context)
+void IIRowSchemaReader::setContext(const ContextPtr & context)
 {
     ColumnsDescription columns;
     if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error))
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index 94df71a88b4..23c6606a6bd 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -34,7 +34,7 @@ public:
     virtual bool hasStrictOrderOfColumns() const { return true; }
 
     virtual bool needContext() const { return false; }
-    virtual void setContext(ContextPtr &) {}
+    virtual void setContext(const ContextPtr &) {}
 
     virtual void setMaxRowsAndBytesToRead(size_t, size_t) {}
     virtual size_t getNumRowsRead() const { return 0; }
@@ -56,7 +56,7 @@ public:
     IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_ = nullptr);
 
     bool needContext() const override { return !hints_str.empty(); }
-    void setContext(ContextPtr & context) override;
+    void setContext(const ContextPtr & context) override;
 
 protected:
     void setMaxRowsAndBytesToRead(size_t max_rows, size_t max_bytes) override
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
index 53cb5a77898..62d33d36206 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@@ -215,7 +215,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
 {
 }
 
-void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx)
+void JSONColumnsSchemaReaderBase::setContext(const ContextPtr & ctx)
 {
     ColumnsDescription columns;
     if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error))
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
index fe80d77cd87..ee7e79afc54 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
@@ -84,7 +84,7 @@ public:
     void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
     bool needContext() const override { return !hints_str.empty(); }
-    void setContext(ContextPtr & ctx) override;
+    void setContext(const ContextPtr & ctx) override;
 
     void setMaxRowsAndBytesToRead(size_t max_rows, size_t max_bytes) override
     {
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index f78ce530ecb..7283eb1330f 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -70,27 +70,36 @@ void JSONRowInputFormat::resetReadBuffer()
     JSONEachRowRowInputFormat::resetReadBuffer();
 }
 
-JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
-    : JSONRowSchemaReader(std::make_unique<PeekableReadBuffer>(in_), format_settings_)
+JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool fallback_to_json_each_row_)
+    : JSONRowSchemaReader(std::make_unique<PeekableReadBuffer>(in_), format_settings_, fallback_to_json_each_row_)
 {
 }
 
-JSONRowSchemaReader::JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const DB::FormatSettings & format_settings_)
-    : JSONEachRowSchemaReader(*buf, format_settings_), peekable_buf(std::move(buf))
+JSONRowSchemaReader::JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const DB::FormatSettings & format_settings_, bool fallback_to_json_each_row_)
+    : JSONEachRowSchemaReader(*buf, format_settings_), peekable_buf(std::move(buf)), fallback_to_json_each_row(fallback_to_json_each_row_)
 {
 }
 
 NamesAndTypesList JSONRowSchemaReader::readSchema()
 {
     skipBOMIfExists(*peekable_buf);
-    PeekableReadBufferCheckpoint checkpoint(*peekable_buf);
-    /// Try to parse metadata, if failed, try to parse data as JSONEachRow format
-    NamesAndTypesList names_and_types;
-    if (JSONUtils::checkAndSkipObjectStart(*peekable_buf) && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types))
-        return names_and_types;
 
-    peekable_buf->rollbackToCheckpoint(true);
-    return JSONEachRowSchemaReader::readSchema();
+    if (fallback_to_json_each_row)
+    {
+        PeekableReadBufferCheckpoint checkpoint(*peekable_buf);
+        /// Try to parse metadata, if failed, try to parse data as JSONEachRow format
+        NamesAndTypesList names_and_types;
+        if (JSONUtils::checkAndSkipObjectStart(*peekable_buf) && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types))
+            return names_and_types;
+
+        peekable_buf->rollbackToCheckpoint(true);
+        return JSONEachRowSchemaReader::readSchema();
+    }
+    else
+    {
+        JSONUtils::skipObjectStart(*peekable_buf);
+        return JSONUtils::readMetadata(*peekable_buf);
+    }
 }
 
 void registerInputFormatJSON(FormatFactory & factory)
@@ -109,19 +118,19 @@ void registerInputFormatJSON(FormatFactory & factory)
 
 void registerJSONSchemaReader(FormatFactory & factory)
 {
-    auto register_schema_reader = [&](const String & format)
+    auto register_schema_reader = [&](const String & format, bool fallback_to_json_each_row)
     {
         factory.registerSchemaReader(
-            format, [](ReadBuffer & buf, const FormatSettings & format_settings) { return std::make_unique<JSONRowSchemaReader>(buf, format_settings); });
+            format, [fallback_to_json_each_row](ReadBuffer & buf, const FormatSettings & format_settings) { return std::make_unique<JSONRowSchemaReader>(buf, format_settings, fallback_to_json_each_row); });
 
         factory.registerAdditionalInfoForSchemaCacheGetter(format, [](const FormatSettings & settings)
         {
             return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON);
         });
     };
-    register_schema_reader("JSON");
+    register_schema_reader("JSON", true);
     /// JSONCompact has the same suffix with metadata.
-    register_schema_reader("JSONCompact");
+    register_schema_reader("JSONCompact", false);
 }
 
 }
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.h b/src/Processors/Formats/Impl/JSONRowInputFormat.h
index b2e1d8a3d6d..6db5cee380a 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.h
@@ -45,16 +45,17 @@ private:
 class JSONRowSchemaReader : public JSONEachRowSchemaReader
 {
 public:
-    JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
+    JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool fallback_to_json_each_row_);
 
     NamesAndTypesList readSchema() override;
 
     bool hasStrictOrderOfColumns() const override { return false; }
 
 private:
-    JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const FormatSettings & format_settings_);
+    JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const FormatSettings & format_settings_, bool fallback_to_json_each_row_);
 
     std::unique_ptr<PeekableReadBuffer> peekable_buf;
+    bool fallback_to_json_each_row;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index a6e4600d83b..f5edfb7c9d4 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -609,7 +609,9 @@ void registerTemplateSchemaReader(FormatFactory & factory)
         {
             size_t index = 0;
             auto idx_getter = [&](const String &) -> std::optional<size_t> { return index++; };
-            auto row_format = fillRowFormat(settings, idx_getter, false);
+            ParsedTemplateFormatString row_format;
+            if (!settings.template_settings.row_format.empty())
+                row_format = fillRowFormat(settings, idx_getter, false);
             std::unordered_set<FormatSettings::EscapingRule> visited_escaping_rules;
             String result = fmt::format("row_format={}, resultset_format={}, row_between_delimiter={}",
                 settings.template_settings.row_format,
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index bf2765bfd1e..f82a8c8ab64 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -37,7 +37,7 @@ public:
     void resetReadBuffer() override;
 
     /// TODO: remove context somehow.
-    void setContext(ContextPtr & context_) { context = Context::createCopy(context_); }
+    void setContext(const ContextPtr & context_) { context = Context::createCopy(context_); }
 
     const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index fa7206eeaac..8120667916e 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -35,7 +35,6 @@
 #include <Server/TCPServer.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-#include <Storages/StorageS3Cluster.h>
 #include <Core/ExternalTable.h>
 #include <Core/ServerSettings.h>
 #include <Access/AccessControl.h>
diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h
index 77a22cd00fc..72b182ad1f4 100644
--- a/src/Storages/DataLakes/IStorageDataLake.h
+++ b/src/Storages/DataLakes/IStorageDataLake.h
@@ -38,25 +38,25 @@ public:
     static ColumnsDescription getTableStructureFromData(
         Configuration & base_configuration,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr local_context)
+        const ContextPtr & local_context)
     {
         auto configuration = getConfigurationForDataRead(base_configuration, local_context);
         return Storage::getTableStructureFromData(configuration, format_settings, local_context);
     }
 
-    static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context)
+    static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context)
     {
         return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
     }
 
-    Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override
+    Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override
     {
         std::lock_guard lock(configuration_update_mutex);
         updateConfigurationImpl(local_context);
         return Storage::getConfiguration();
     }
 
-    void updateConfiguration(ContextPtr local_context) override
+    void updateConfiguration(const ContextPtr & local_context) override
     {
         std::lock_guard lock(configuration_update_mutex);
         updateConfigurationImpl(local_context);
@@ -64,7 +64,7 @@ public:
 
 private:
     static Configuration getConfigurationForDataRead(
-        const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {})
+        const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {})
     {
         auto configuration{base_configuration};
         configuration.update(local_context);
@@ -84,12 +84,12 @@ private:
         return configuration;
     }
 
-    static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context)
+    static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context)
     {
         return MetadataParser().getFiles(configuration, local_context);
     }
 
-    void updateConfigurationImpl(ContextPtr local_context)
+    void updateConfigurationImpl(const ContextPtr & local_context)
     {
         const bool updated = base_configuration.update(local_context);
         auto new_keys = getDataFiles(base_configuration, local_context);
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
index 20ac77976cb..faef21d6c72 100644
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
+++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
@@ -40,7 +40,7 @@ StorageIceberg::StorageIceberg(
 ColumnsDescription StorageIceberg::getTableStructureFromData(
     Configuration & base_configuration,
     const std::optional<FormatSettings> &,
-    ContextPtr local_context)
+    const ContextPtr & local_context)
 {
     auto configuration{base_configuration};
     configuration.update(local_context);
@@ -48,7 +48,7 @@ ColumnsDescription StorageIceberg::getTableStructureFromData(
     return ColumnsDescription(metadata->getTableSchema());
 }
 
-void StorageIceberg::updateConfigurationImpl(ContextPtr local_context)
+void StorageIceberg::updateConfigurationImpl(const ContextPtr & local_context)
 {
     const bool updated = base_configuration.update(local_context);
     auto new_metadata = parseIcebergMetadata(base_configuration, local_context);
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h
index a18865b5a54..0b346ef0175 100644
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h
+++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h
@@ -51,28 +51,28 @@ public:
     static ColumnsDescription getTableStructureFromData(
         Configuration & base_configuration,
         const std::optional<FormatSettings> &,
-        ContextPtr local_context);
+        const ContextPtr & local_context);
 
     static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context)
     {
         return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
     }
 
-    Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override
+    Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override
     {
         std::lock_guard lock(configuration_update_mutex);
         updateConfigurationImpl(local_context);
         return StorageS3::getConfiguration();
     }
 
-    void updateConfiguration(ContextPtr local_context) override
+    void updateConfiguration(const ContextPtr & local_context) override
     {
         std::lock_guard lock(configuration_update_mutex);
         updateConfigurationImpl(local_context);
     }
 
 private:
-    void updateConfigurationImpl(ContextPtr local_context);
+    void updateConfigurationImpl(const ContextPtr & local_context);
 
     std::unique_ptr<IcebergMetadata> current_metadata;
     Configuration base_configuration;
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 1e26f1be72c..a846e9fd9ef 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -67,6 +67,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_COMPILE_REGEXP;
+    extern const int CANNOT_DETECT_FORMAT;
 }
 namespace
 {
@@ -194,7 +195,7 @@ StorageHDFS::StorageHDFS(
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
     const String & comment,
-    ContextPtr context_,
+    const ContextPtr & context_,
     const String & compression_method_,
     const bool distributed_processing_,
     ASTPtr partition_by_)
@@ -206,7 +207,8 @@ StorageHDFS::StorageHDFS(
     , distributed_processing(distributed_processing_)
     , partition_by(partition_by_)
 {
-    FormatFactory::instance().checkFormatName(format_name);
+    if (format_name != "auto")
+        FormatFactory::instance().checkFormatName(format_name);
     context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
     checkHDFSURL(uri_);
 
@@ -217,11 +219,19 @@ StorageHDFS::StorageHDFS(
 
     if (columns_.empty())
     {
-        auto columns = getTableStructureFromData(format_name, uri_, compression_method, context_);
+        ColumnsDescription columns;
+        if (format_name == "auto")
+            std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_);
+        else
+            columns = getTableStructureFromData(format_name, uri_, compression_method, context_);
+
         storage_metadata.setColumns(columns);
     }
     else
     {
+        if (format_name == "auto")
+            format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second;
+
         /// We don't allow special columns in HDFS storage.
         if (!columns_.hasOnlyOrdinary())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
@@ -243,25 +253,25 @@ namespace
         ReadBufferIterator(
             const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_,
             const String & uri_without_path_,
-            const String & format_,
+            std::optional<String> format_,
             const String & compression_method_,
             const ContextPtr & context_)
         : WithContext(context_)
         , paths_with_info(paths_with_info_)
         , uri_without_path(uri_without_path_)
-        , format(format_)
+        , format(std::move(format_))
         , compression_method(compression_method_)
         {
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
             bool is_first = current_index == 0;
             /// For default mode check cached columns for all paths on first iteration.
             if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
             {
                 if (auto cached_columns = tryGetColumnsFromCache(paths_with_info))
-                    return {nullptr, cached_columns};
+                    return {nullptr, cached_columns, format};
             }
 
             StorageHDFS::PathWithInfo path_with_info;
@@ -271,10 +281,17 @@ namespace
                 if (current_index == paths_with_info.size())
                 {
                     if (is_first)
-                        throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                                        "Cannot extract table structure from {} format file, because all files are empty. "
-                                        "You must specify table structure manually", format);
-                    return {nullptr, std::nullopt};
+                    {
+                        if (format)
+                            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                            "The table structure cannot be extracted from a {} format file, because all files are empty. "
+                                            "You can specify table structure manually", *format);
+
+                        throw Exception(
+                            ErrorCodes::CANNOT_DETECT_FORMAT,
+                            "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually");
+                    }
+                    return {nullptr, std::nullopt, format};
                 }
 
                 path_with_info = paths_with_info[current_index++];
@@ -285,7 +302,7 @@ namespace
                 {
                     std::vector<StorageHDFS::PathWithInfo> paths = {path_with_info};
                     if (auto cached_columns = tryGetColumnsFromCache(paths))
-                        return {nullptr, cached_columns};
+                        return {nullptr, cached_columns, format};
                 }
 
                 auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
@@ -293,7 +310,7 @@ namespace
                 if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof())
                 {
                     const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-                    return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max)), std::nullopt};
+                    return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max)), std::nullopt, format};
                 }
             }
         }
@@ -304,7 +321,7 @@ namespace
                 return;
 
             String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext());
+            auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext());
             StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -315,7 +332,7 @@ namespace
                 return;
 
             String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext());
+            auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext());
             StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns);
         }
 
@@ -328,10 +345,15 @@ namespace
             Strings sources;
             sources.reserve(paths_with_info.size());
             std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; });
-            auto cache_keys = getKeysForSchemaCache(sources, format, {}, getContext());
+            auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext());
             StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+
         String getLastFileName() const override
         {
             if (current_index != 0)
@@ -340,13 +362,27 @@ namespace
             return "";
         }
 
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            chassert(current_index > 0 && current_index <= paths_with_info.size());
+            auto path_with_info = paths_with_info[current_index - 1];
+            auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
+            auto impl = std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
+            const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
+            return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+        }
+
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_)
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
+            auto context = getContext();
+
+            if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs)
                 return std::nullopt;
 
-            auto & schema_cache = StorageHDFS::getSchemaCache(getContext());
+            auto & schema_cache = StorageHDFS::getSchemaCache(context);
             for (const auto & path_with_info : paths_with_info_)
             {
                 auto get_last_mod_time = [&]() -> std::optional<time_t>
@@ -354,7 +390,7 @@ namespace
                     if (path_with_info.info)
                         return path_with_info.info->last_mod_time;
 
-                    auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef());
+                    auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
                     auto fs = createHDFSFS(builder.get());
                     HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()));
                     if (hdfs_info)
@@ -364,10 +400,28 @@ namespace
                 };
 
                 String url = uri_without_path + path_with_info.path;
-                auto cache_key = getKeyForSchemaCache(url, format, {}, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
+                if (format)
+                {
+                    auto cache_key = getKeyForSchemaCache(url, *format, {}, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        return columns;
+                }
+                else
+                {
+                    /// If format is unknown, we can iterate through all possible input formats
+                    /// and check if we have an entry with this format and this file in schema cache.
+                    /// If we have such entry for some format, we can use this format to read the file.
+                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                    {
+                        auto cache_key = getKeyForSchemaCache(url, format_name, {}, context);
+                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        {
+                            /// Now format is known. It should be the same for all files.
+                            format = format_name;
+                            return columns;
+                        }
+                    }
+                }
             }
 
             return std::nullopt;
@@ -375,29 +429,49 @@ namespace
 
         const std::vector<StorageHDFS::PathWithInfo> & paths_with_info;
         const String & uri_without_path;
-        const String & format;
+        std::optional<String> format;
         const String & compression_method;
         size_t current_index = 0;
     };
 }
 
-ColumnsDescription StorageHDFS::getTableStructureFromData(
-    const String & format,
+std::pair<ColumnsDescription, String> StorageHDFS::getTableStructureAndFormatFromDataImpl(
+    std::optional<String> format,
     const String & uri,
     const String & compression_method,
-    ContextPtr ctx)
+    const ContextPtr & ctx)
 {
     const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
     auto paths_with_info = getPathsList(path_from_uri, uri, ctx);
 
-    if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
+    if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format)))
+    {
+        if (format)
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path."
+                " You can specify table structure manually", *format);
+
         throw Exception(
             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-            "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path."
-            " You must specify table structure manually", format);
+            "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path."
+            " You can specify the format manually");
+    }
 
     ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx);
-    return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx);
+    if (format)
+        return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format};
+    return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx);
+}
+
+std::pair<ColumnsDescription, String> StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx)
+{
+    return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx);
+}
+
+ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx)
+{
+    return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first;
 }
 
 class HDFSSource::DisclosedGlobIterator::Impl
@@ -533,7 +607,7 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next()
 HDFSSource::HDFSSource(
     const ReadFromFormatInfo & info,
     StorageHDFSPtr storage_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     UInt64 max_block_size_,
     std::shared_ptr<IteratorWrapper> file_iterator_,
     bool need_only_count_)
@@ -712,7 +786,7 @@ public:
     HDFSSink(const String & uri,
         const String & format,
         const Block & sample_block,
-        ContextPtr context,
+        const ContextPtr & context,
         const CompressionMethod compression_method)
         : SinkToStorage(sample_block)
     {
@@ -1073,7 +1147,7 @@ void registerStorageHDFS(StorageFactory & factory)
         }
 
         if (format_name == "auto")
-            format_name = FormatFactory::instance().getFormatFromFileName(url, true);
+            format_name = FormatFactory::instance().getFormatFromFileName(url);
 
         String compression_method;
         if (engine_args.size() == 3)
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index f1f0019d3e0..1edbf2b77ce 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -44,7 +44,7 @@ public:
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
         const String & comment,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const String & compression_method_ = "",
         bool distributed_processing_ = false,
         ASTPtr partition_by = nullptr);
@@ -86,7 +86,12 @@ public:
         const String & format,
         const String & uri,
         const String & compression_method,
-        ContextPtr ctx);
+        const ContextPtr & ctx);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
+        const String & uri,
+        const String & compression_method,
+        const ContextPtr & ctx);
 
     static SchemaCache & getSchemaCache(const ContextPtr & ctx);
 
@@ -97,6 +102,12 @@ protected:
     friend class ReadFromHDFS;
 
 private:
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromDataImpl(
+        std::optional<String> format,
+        const String & uri,
+        const String & compression_method,
+        const ContextPtr & ctx);
+
     std::vector<String> uris;
     String format_name;
     String compression_method;
@@ -141,7 +152,7 @@ public:
     HDFSSource(
         const ReadFromFormatInfo & info,
         StorageHDFSPtr storage_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         UInt64 max_block_size_,
         std::shared_ptr<IteratorWrapper> file_iterator_,
         bool need_only_count_);
diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index 2e8129b9845..a1e03926520 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -43,12 +43,10 @@ StorageHDFSCluster::StorageHDFSCluster(
     const String & format_name_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    const String & compression_method_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
+    const String & compression_method)
+    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageHDFSCluster (" + table_id_.table_name + ")"))
     , uri(uri_)
     , format_name(format_name_)
-    , compression_method(compression_method_)
 {
     checkHDFSURL(uri_);
     context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
@@ -57,11 +55,20 @@ StorageHDFSCluster::StorageHDFSCluster(
 
     if (columns_.empty())
     {
-        auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
+        ColumnsDescription columns;
+        if (format_name == "auto")
+            std::tie(columns, format_name) = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_);
+        else
+            columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (format_name == "auto")
+            format_name = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_).second;
+
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -69,13 +76,14 @@ StorageHDFSCluster::StorageHDFSCluster(
     virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
+void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query));
 
-    TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
+    TableFunctionHDFSCluster::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context);
 }
 
 
diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h
index 7c4c41a573a..40884f98984 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.h
+++ b/src/Storages/HDFS/StorageHDFSCluster.h
@@ -28,8 +28,7 @@ public:
         const String & format_name_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        const String & compression_method_,
-        bool structure_argument_was_provided_);
+        const String & compression_method);
 
     std::string getName() const override { return "HDFSCluster"; }
 
@@ -42,11 +41,10 @@ public:
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
+    void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
 
     String uri;
     String format_name;
-    String compression_method;
     NamesAndTypesList virtual_columns;
 };
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index 6f42d8f855c..348e37fc72c 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -32,12 +32,10 @@ namespace DB
 IStorageCluster::IStorageCluster(
     const String & cluster_name_,
     const StorageID & table_id_,
-    Poco::Logger * log_,
-    bool structure_argument_was_provided_)
+    Poco::Logger * log_)
     : IStorage(table_id_)
     , log(log_)
     , cluster_name(cluster_name_)
-    , structure_argument_was_provided(structure_argument_was_provided_)
 {
 }
 
@@ -130,8 +128,7 @@ void IStorageCluster::read(
         query_to_send = interpreter.getQueryInfo().query->clone();
     }
 
-    if (!structure_argument_was_provided)
-        addColumnsStructureToQuery(query_to_send, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), context);
+    updateQueryToSendIfNeeded(query_to_send, storage_snapshot, context);
 
     RestoreQualifiedNamesVisitor::Data data;
     data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0));
diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h
index b233f20103d..28ebda5125e 100644
--- a/src/Storages/IStorageCluster.h
+++ b/src/Storages/IStorageCluster.h
@@ -19,8 +19,7 @@ public:
     IStorageCluster(
         const String & cluster_name_,
         const StorageID & table_id_,
-        Poco::Logger * log_,
-        bool structure_argument_was_provided_);
+        Poco::Logger * log_);
 
     void read(
         QueryPlan & query_plan,
@@ -42,13 +41,11 @@ public:
 
 protected:
     virtual void updateBeforeRead(const ContextPtr &) {}
-
-    virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0;
+    virtual void updateQueryToSendIfNeeded(ASTPtr & /*query*/,  const StorageSnapshotPtr & /*storage_snapshot*/, const ContextPtr & /*context*/) {}
 
 private:
     Poco::Logger * log;
     String cluster_name;
-    bool structure_argument_was_provided;
 };
 
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index bc33e8cf2a9..098d279e482 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -143,11 +143,17 @@ StorageS3Queue::StorageS3Queue(
     StorageInMemoryMetadata storage_metadata;
     if (columns_.empty())
     {
-        auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_);
+        ColumnsDescription columns;
+        if (configuration.format == "auto")
+            std::tie(columns, configuration.format) = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_);
+        else
+            columns = StorageS3::getTableStructureFromData(configuration, format_settings, context_);
         storage_metadata.setColumns(columns);
     }
     else
     {
+        if (configuration.format == "auto")
+            configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second;
         storage_metadata.setColumns(columns_);
     }
 
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index cd841a1a673..888d360aff1 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -65,6 +65,7 @@ namespace ErrorCodes
     extern const int DATABASE_ACCESS_DENIED;
     extern const int CANNOT_COMPILE_REGEXP;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int CANNOT_DETECT_FORMAT;
     extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
 
@@ -127,7 +128,7 @@ void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configurat
 }
 
 
-StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, ContextPtr local_context)
+StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context)
 {
     StorageAzureBlob::Configuration configuration;
 
@@ -143,7 +144,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
         configuration.blobs_paths = {configuration.blob_path};
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
+            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
 
         return configuration;
     }
@@ -236,13 +237,13 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
     configuration.blobs_paths = {configuration.blob_path};
 
     if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
+        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
 
     return configuration;
 }
 
 
-AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr local_context)
+AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context)
 {
     const auto & context_settings = local_context->getSettingsRef();
     auto settings_ptr = std::make_unique<AzureObjectStorageSettings>();
@@ -447,7 +448,7 @@ Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const
 StorageAzureBlob::StorageAzureBlob(
     const Configuration & configuration_,
     std::unique_ptr<AzureObjectStorage> && object_storage_,
-    ContextPtr context,
+    const ContextPtr & context,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
@@ -463,17 +464,25 @@ StorageAzureBlob::StorageAzureBlob(
     , format_settings(format_settings_)
     , partition_by(partition_by_)
 {
-    FormatFactory::instance().checkFormatName(configuration.format);
+    if (configuration.format != "auto")
+        FormatFactory::instance().checkFormatName(configuration.format);
     context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL());
 
     StorageInMemoryMetadata storage_metadata;
     if (columns_.empty())
     {
-        auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing);
+        ColumnsDescription columns;
+        if (configuration.format == "auto")
+            std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context);
+        else
+            columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context);
         storage_metadata.setColumns(columns);
     }
     else
     {
+        if (configuration.format == "auto")
+            configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second;
+
         /// We don't allow special columns in File storage.
         if (!columns_.hasOnlyOrdinary())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
@@ -517,7 +526,7 @@ public:
     StorageAzureBlobSink(
         const String & format,
         const Block & sample_block_,
-        ContextPtr context,
+        const ContextPtr & context,
         std::optional<FormatSettings> format_settings_,
         const CompressionMethod compression_method,
         AzureObjectStorage * object_storage,
@@ -607,22 +616,21 @@ private:
     std::mutex cancel_mutex;
 };
 
-class PartitionedStorageAzureBlobSink : public PartitionedSink
+class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext
 {
 public:
     PartitionedStorageAzureBlobSink(
         const ASTPtr & partition_by,
         const String & format_,
         const Block & sample_block_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         std::optional<FormatSettings> format_settings_,
         const CompressionMethod compression_method_,
         AzureObjectStorage * object_storage_,
         const String & blob_)
-        : PartitionedSink(partition_by, context_, sample_block_)
+        : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_)
         , format(format_)
         , sample_block(sample_block_)
-        , context(context_)
         , compression_method(compression_method_)
         , object_storage(object_storage_)
         , blob(blob_)
@@ -638,7 +646,7 @@ public:
         return std::make_shared<StorageAzureBlobSink>(
             format,
             sample_block,
-            context,
+            getContext(),
             format_settings,
             compression_method,
             object_storage,
@@ -649,7 +657,6 @@ public:
 private:
     const String format;
     const Block sample_block;
-    const ContextPtr context;
     const CompressionMethod compression_method;
     AzureObjectStorage * object_storage;
     const String blob;
@@ -913,7 +920,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     String blob_path_with_globs_,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     RelativePathsWithMetadata * outer_blobs_,
     std::function<void(FileProgress)> file_progress_callback_)
     : IIterator(context_)
@@ -1028,7 +1035,7 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
     const Strings & keys_,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     RelativePathsWithMetadata * outer_blobs,
     std::function<void(FileProgress)> file_progress_callback)
     : IIterator(context_)
@@ -1147,7 +1154,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     const ReadFromFormatInfo & info,
     const String & format_,
     String name_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     std::optional<FormatSettings> format_settings_,
     UInt64 max_block_size_,
     String compression_hint_,
@@ -1290,6 +1297,7 @@ namespace
         ReadBufferIterator(
             const std::shared_ptr<StorageAzureBlobSource::IIterator> & file_iterator_,
             AzureObjectStorage * object_storage_,
+            std::optional<String> format_,
             const StorageAzureBlob::Configuration & configuration_,
             const std::optional<FormatSettings> & format_settings_,
             const RelativePathsWithMetadata & read_keys_,
@@ -1298,19 +1306,20 @@ namespace
             , file_iterator(file_iterator_)
             , object_storage(object_storage_)
             , configuration(configuration_)
+            , format(std::move(format_))
             , format_settings(format_settings_)
             , read_keys(read_keys_)
             , prev_read_keys_size(read_keys_.size())
         {
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
             /// For default mode check cached columns for currently read keys on first iteration.
             if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
             {
                 if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                    return {nullptr, cached_columns};
+                    return {nullptr, cached_columns, format};
             }
 
             current_path_with_metadata = file_iterator->next();
@@ -1318,12 +1327,20 @@ namespace
             if (current_path_with_metadata.relative_path.empty())
             {
                 if (first)
+                {
+                    if (format)
+                        throw Exception(
+                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                            "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
+                            "in AzureBlobStorage. You can specify table structure manually", *format);
+
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Cannot extract table structure from {} format file, because there are no files with provided path "
-                        "in AzureBlobStorage. You must specify table structure manually", configuration.format);
+                        "The data format cannot be detected by the contents of the files, because there are no files with provided path "
+                        "in AzureBlobStorage. You can specify table structure manually");
+                }
 
-                return {nullptr, std::nullopt};
+                return {nullptr, std::nullopt, format};
             }
 
             first = false;
@@ -1334,13 +1351,13 @@ namespace
                 auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
                 prev_read_keys_size = read_keys.size();
                 if (columns_from_cache)
-                    return {nullptr, columns_from_cache};
+                    return {nullptr, columns_from_cache, format};
             }
             else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
             {
                 RelativePathsWithMetadata paths = {current_path_with_metadata};
                 if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
-                    return {nullptr, columns_from_cache};
+                    return {nullptr, columns_from_cache, format};
             }
 
             first = false;
@@ -1348,7 +1365,7 @@ namespace
             return {wrapReadBufferWithCompressionMethod(
                 object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes),
                 chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method),
-                zstd_window_log_max), std::nullopt};
+                zstd_window_log_max), std::nullopt, format};
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
@@ -1357,7 +1374,7 @@ namespace
                 return;
 
             String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(source, *format, format_settings, getContext());
             StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -1368,7 +1385,7 @@ namespace
                 return;
 
             String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(source, *format, format_settings, getContext());
             StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns);
         }
 
@@ -1382,16 +1399,36 @@ namespace
             Strings sources;
             sources.reserve(read_keys.size());
             std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; });
-            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
+            auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext());
             StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+        
         String getLastFileName() const override { return current_path_with_metadata.relative_path; }
 
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
+            return wrapReadBufferWithCompressionMethod(
+                        object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes),
+                        chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method),
+                        zstd_window_log_max);
+        }
+
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end)
         {
-            auto & schema_cache = StorageAzureBlob::getSchemaCache(getContext());
+            auto context = getContext();
+            if (!context->getSettingsRef().schema_inference_use_cache_for_azure)
+                return std::nullopt;
+
+            auto & schema_cache = StorageAzureBlob::getSchemaCache(context);
             for (auto it = begin; it < end; ++it)
             {
                 auto get_last_mod_time = [&] -> std::optional<time_t>
@@ -1403,10 +1440,28 @@ namespace
 
                 auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
                 String source = host_and_bucket + '/' + it->relative_path;
-                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
+                if (format)
+                {
+                    auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        return columns;
+                }
+                else
+                {
+                    /// If format is unknown, we can iterate through all possible input formats
+                    /// and check if we have an entry with this format and this file in schema cache.
+                    /// If we have such entry for some format, we can use this format to read the file.
+                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                    {
+                        auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context);
+                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        {
+                            /// Now format is known. It should be the same for all files.
+                            format = format_name;
+                            return columns;
+                        }
+                    }
+                }
             }
 
             return std::nullopt;
@@ -1415,6 +1470,7 @@ namespace
         std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
         AzureObjectStorage * object_storage;
         const StorageAzureBlob::Configuration & configuration;
+        std::optional<String> format;
         const std::optional<FormatSettings> & format_settings;
         const RelativePathsWithMetadata & read_keys;
         size_t prev_read_keys_size;
@@ -1423,21 +1479,16 @@ namespace
     };
 }
 
-ColumnsDescription StorageAzureBlob::getTableStructureFromData(
+std::pair<ColumnsDescription, String> StorageAzureBlob::getTableStructureAndFormatFromDataImpl(
+    std::optional<String> format,
     AzureObjectStorage * object_storage,
     const Configuration & configuration,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx,
-    bool distributed_processing)
+    const ContextPtr & ctx)
 {
     RelativePathsWithMetadata read_keys;
     std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
-    if (distributed_processing)
-    {
-        file_iterator = std::make_shared<StorageAzureBlobSource::ReadIterator>(ctx,
-            ctx->getReadTaskCallback());
-    }
-    else if (configuration.withGlobs())
+    if (configuration.withGlobs())
     {
         file_iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
             object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys);
@@ -1448,8 +1499,28 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData(
             object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys);
     }
 
-    ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx);
-    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
+    ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx);
+    if (format)
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format};
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx);
+}
+
+std::pair<ColumnsDescription, String> StorageAzureBlob::getTableStructureAndFormatFromData(
+    DB::AzureObjectStorage * object_storage,
+    const DB::StorageAzureBlob::Configuration & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const DB::ContextPtr & ctx)
+{
+    return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx);
+}
+
+ColumnsDescription StorageAzureBlob::getTableStructureFromData(
+    DB::AzureObjectStorage * object_storage,
+    const DB::StorageAzureBlob::Configuration & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const DB::ContextPtr & ctx)
+{
+    return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx).first;
 }
 
 SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx)
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 16e5b9edfb6..71c93021dd4 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -31,9 +31,9 @@ public:
 
         String getPath() const { return blob_path; }
 
-        bool update(ContextPtr context);
+        bool update(const ContextPtr & context);
 
-        void connect(ContextPtr context);
+        void connect(const ContextPtr & context);
 
         bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; }
 
@@ -59,7 +59,7 @@ public:
     StorageAzureBlob(
         const Configuration & configuration_,
         std::unique_ptr<AzureObjectStorage> && object_storage_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
@@ -68,10 +68,10 @@ public:
         bool distributed_processing_,
         ASTPtr partition_by_);
 
-    static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
+    static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context);
     static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
 
-    static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
+    static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context);
 
     static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection);
 
@@ -115,10 +115,22 @@ public:
         AzureObjectStorage * object_storage,
         const Configuration & configuration,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx,
-        bool distributed_processing = false);
+        const ContextPtr & ctx);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
+        AzureObjectStorage * object_storage,
+        const Configuration & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & ctx);
 
 private:
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromDataImpl(
+        std::optional<String> format,
+        AzureObjectStorage * object_storage,
+        const Configuration & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & ctx);
+
     friend class ReadFromAzureBlob;
 
     std::string name;
@@ -137,7 +149,7 @@ public:
     class IIterator : public WithContext
     {
     public:
-        IIterator(ContextPtr context_):WithContext(context_) {}
+        IIterator(const ContextPtr & context_):WithContext(context_) {}
         virtual ~IIterator() = default;
         virtual RelativePathWithMetadata next() = 0;
 
@@ -153,7 +165,7 @@ public:
             String blob_path_with_globs_,
             const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
+            const ContextPtr & context_,
             RelativePathsWithMetadata * outer_blobs_,
             std::function<void(FileProgress)> file_progress_callback_ = {});
 
@@ -186,7 +198,7 @@ public:
     class ReadIterator : public IIterator
     {
     public:
-        explicit ReadIterator(ContextPtr context_,
+        explicit ReadIterator(const ContextPtr & context_,
                               const ReadTaskCallback & callback_)
             : IIterator(context_), callback(callback_) { }
         RelativePathWithMetadata next() override
@@ -207,7 +219,7 @@ public:
             const Strings & keys_,
             const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
+            const ContextPtr & context_,
             RelativePathsWithMetadata * outer_blobs,
             std::function<void(FileProgress)> file_progress_callback = {});
 
@@ -229,7 +241,7 @@ public:
         const ReadFromFormatInfo & info,
         const String & format_,
         String name_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         std::optional<FormatSettings> format_settings_,
         UInt64 max_block_size_,
         String compression_hint_,
diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp
index a6372577fb0..0f607a9812f 100644
--- a/src/Storages/StorageAzureBlobCluster.cpp
+++ b/src/Storages/StorageAzureBlobCluster.cpp
@@ -36,23 +36,30 @@ StorageAzureBlobCluster::StorageAzureBlobCluster(
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    ContextPtr context_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
+    const ContextPtr & context)
+    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"))
     , configuration{configuration_}
     , object_storage(std::move(object_storage_))
 {
-    context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL());
+    context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL());
     StorageInMemoryMetadata storage_metadata;
 
     if (columns_.empty())
     {
+        ColumnsDescription columns;
         /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function
-        auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false);
+        if (configuration.format == "auto")
+            std::tie(columns, configuration.format) = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context);
+        else
+            columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context);
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (configuration.format == "auto")
+            configuration.format = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context).second;
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -60,13 +67,14 @@ StorageAzureBlobCluster::StorageAzureBlobCluster(
     virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
+void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
 
-    TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
+    TableFunctionAzureBlobStorageCluster::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), configuration.format, context);
 }
 
 RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h
index 2831b94f825..476f21c6742 100644
--- a/src/Storages/StorageAzureBlobCluster.h
+++ b/src/Storages/StorageAzureBlobCluster.h
@@ -27,8 +27,7 @@ public:
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        ContextPtr context_,
-        bool structure_argument_was_provided_);
+        const ContextPtr & context);
 
     std::string getName() const override { return "AzureBlobStorageCluster"; }
 
@@ -43,7 +42,7 @@ public:
 private:
     void updateBeforeRead(const ContextPtr & /*context*/) override {}
 
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
+    void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
 
     StorageAzureBlob::Configuration configuration;
     NamesAndTypesList virtual_columns;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 9f864813de9..920c7069529 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -89,6 +89,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_APPEND_TO_FILE;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int CANNOT_DETECT_FORMAT;
     extern const int CANNOT_COMPILE_REGEXP;
 }
 
@@ -327,7 +328,7 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
 
 }
 
-Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read)
+Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read)
 {
     fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path);
     fs::path fs_table_path(table_path);
@@ -374,27 +375,44 @@ namespace
     public:
         ReadBufferFromFileIterator(
             const std::vector<String> & paths_,
-            const String & format_,
+            std::optional<String> format_,
             const String & compression_method_,
             const std::optional<FormatSettings> & format_settings_,
-            ContextPtr context_)
+            const ContextPtr & context_)
             : WithContext(context_)
             , paths(paths_)
-            , format(format_)
+            , format(std::move(format_))
             , compression_method(compression_method_)
             , format_settings(format_settings_)
         {
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
             bool is_first = current_index == 0;
-            /// For default mode check cached columns for all paths on first iteration.
-            /// If we have cached columns, next() won't be called again.
-            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            if (is_first)
             {
-                if (auto cached_columns = tryGetColumnsFromCache(paths))
-                    return {nullptr, cached_columns};
+                /// If format is unknown we iterate through all paths on first iteration and
+                /// try to determine format by file name.
+                if (!format)
+                {
+                    for (const auto & path : paths)
+                    {
+                        if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path))
+                        {
+                            format = format_from_path;
+                            break;
+                        }
+                    }
+                }
+
+                /// For default mode check cached columns for all paths on first iteration.
+                /// If we have cached columns, next() won't be called again.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                {
+                    if (auto cached_columns = tryGetColumnsFromCache(paths))
+                        return {nullptr, cached_columns, format};
+                }
             }
 
             String path;
@@ -405,11 +423,18 @@ namespace
                 if (current_index == paths.size())
                 {
                     if (is_first)
+                    {
+                        if (format)
+                            throw Exception(
+                                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                "The table structure cannot be extracted from a {} format file, because all files are empty. You can specify the format manually",
+                                *format);
+
                         throw Exception(
-                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
-                            format);
-                    return {nullptr, std::nullopt};
+                            ErrorCodes::CANNOT_DETECT_FORMAT,
+                            "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually");
+                    }
+                    return {nullptr, std::nullopt, std::nullopt};
                 }
 
                 path = paths[current_index++];
@@ -420,10 +445,10 @@ namespace
             if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
             {
                 if (auto cached_columns = tryGetColumnsFromCache({path}))
-                    return {nullptr, cached_columns};
+                    return {nullptr, cached_columns, format};
             }
 
-            return {createReadBuffer(path, file_stat, false, -1, compression_method, getContext()), std::nullopt};
+            return {createReadBuffer(path, file_stat, false, -1, compression_method, getContext()), std::nullopt, format};
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
@@ -431,7 +456,7 @@ namespace
             if (!getContext()->getSettingsRef().use_cache_for_count_from_files)
                 return;
 
-            auto key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(paths[current_index - 1], *format, format_settings, getContext());
             StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -443,7 +468,7 @@ namespace
 
             /// For union mode, schema can be different for different files, so we need to
             /// cache last inferred schema only for last processed file.
-            auto cache_key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext());
+            auto cache_key = getKeyForSchemaCache(paths[current_index - 1], *format, format_settings, getContext());
             StorageFile::getSchemaCache(getContext()).addColumns(cache_key, columns);
         }
 
@@ -454,7 +479,7 @@ namespace
                 return;
 
             /// For default mode we cache resulting schema for all paths.
-            auto cache_keys = getKeysForSchemaCache(paths, format, format_settings, getContext());
+            auto cache_keys = getKeysForSchemaCache(paths, *format, format_settings, getContext());
             StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
@@ -465,14 +490,30 @@ namespace
             return "";
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            chassert(current_index > 0 && current_index <= paths.size());
+            auto path = paths[current_index - 1];
+            auto file_stat = getFileStat(path, false, -1, "File");
+            return createReadBuffer(path, file_stat, false, -1, compression_method, getContext());
+        }
+
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(const Strings & paths_)
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file)
+            auto context = getContext();
+            if (!context->getSettingsRef().schema_inference_use_cache_for_file)
                 return std::nullopt;
 
             /// Check if the cache contains one of the paths.
-            auto & schema_cache = StorageFile::getSchemaCache(getContext());
+            auto & schema_cache = StorageFile::getSchemaCache(context);
             struct stat file_stat{};
             for (const auto & path : paths_)
             {
@@ -484,10 +525,28 @@ namespace
                     return file_stat.st_mtime;
                 };
 
-                auto cache_key = getKeyForSchemaCache(path, format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
+                if (format)
+                {
+                    auto cache_key = getKeyForSchemaCache(path, *format, format_settings, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        return columns;
+                }
+                else
+                {
+                    /// If format is unknown, we can iterate through all possible input formats
+                    /// and check if we have an entry with this format and this file in schema cache.
+                    /// If we have such entry for some format, we can use this format to read the file.
+                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                    {
+                        auto cache_key = getKeyForSchemaCache(path, format_name, format_settings, context);
+                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        {
+                            /// Now format is known. It should be the same for all files.
+                            format = format_name;
+                            return columns;
+                        }
+                    }
+                }
             }
 
             return std::nullopt;
@@ -496,7 +555,7 @@ namespace
         const std::vector<String> & paths;
 
         size_t current_index = 0;
-        String format;
+        std::optional<String> format;
         String compression_method;
         const std::optional<FormatSettings> & format_settings;
     };
@@ -506,17 +565,17 @@ namespace
     public:
         ReadBufferFromArchiveIterator(
             const StorageFile::ArchiveInfo & archive_info_,
-            const String & format_,
+            std::optional<String> format_,
             const std::optional<FormatSettings> & format_settings_,
-            ContextPtr context_)
+            const ContextPtr & context_)
             : WithContext(context_)
             , archive_info(archive_info_)
-            , format(format_)
+            , format(std::move(format_))
             , format_settings(format_settings_)
         {
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
             /// For default mode check cached columns for all initial archive paths (maybe with globs) on first iteration.
             /// If we have cached columns, next() won't be called again.
@@ -524,8 +583,8 @@ namespace
             {
                 for (const auto & archive : archive_info.paths_to_archives)
                 {
-                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, archive_info.path_in_archive))
-                        return {nullptr, cached_columns};
+                    if (auto cached_schema = tryGetSchemaFromCache(archive, fmt::format("{}::{}", archive, archive_info.path_in_archive)))
+                        return {nullptr, cached_schema, format};
                 }
             }
 
@@ -535,12 +594,19 @@ namespace
                 if (current_archive_index == archive_info.paths_to_archives.size())
                 {
                     if (is_first)
-                        throw Exception(
-                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
-                            format);
+                    {
+                        if (format)
+                            throw Exception(
+                                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                "The table structure cannot be extracted from a {} format file, because all files are empty. You can specify table structure manually",
+                                *format);
 
-                    return {nullptr, std::nullopt};
+                        throw Exception(
+                            ErrorCodes::CANNOT_DETECT_FORMAT,
+                            "The data format cannot be detected by the contents of the files, because all files are empty. You can specify the format manually");
+                    }
+
+                    return {nullptr, std::nullopt, format};
                 }
 
                 const auto & archive = archive_info.paths_to_archives[current_archive_index];
@@ -554,11 +620,18 @@ namespace
                         continue;
                     }
 
+                    if (format)
+                        throw Exception(
+                            ErrorCodes::CANNOT_DETECT_FORMAT,
+                            "The table structure cannot be extracted from a {} format file, because the archive {} is empty. "
+                            "You can specify table structure manually",
+                            *format,
+                            archive);
+
                     throw Exception(
-                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Cannot extract table structure from {} format file, because the archive {} is empty. "
-                        "You must specify table structure manually",
-                        format,
+                        ErrorCodes::CANNOT_DETECT_FORMAT,
+                        "The data format cannot be detected by the contents of the files, because the archive {} is empty. "
+                        "You can specify the format manually",
                         archive);
                 }
 
@@ -574,8 +647,8 @@ namespace
                     last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive));
                     is_first = false;
 
-                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path))
-                        return {nullptr, cached_columns};
+                    if (auto cached_schema = tryGetSchemaFromCache(archive, last_read_file_path))
+                        return {nullptr, cached_schema, format};
                 }
                 else
                 {
@@ -611,13 +684,20 @@ namespace
                     last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename));
                     is_first = false;
 
-                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path))
+                    /// If format is unknown we can try to determine it by the file name.
+                    if (!format)
+                    {
+                        if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename))
+                            format = format_from_file;
+                    }
+
+                    if (auto cached_schema = tryGetSchemaFromCache(archive, last_read_file_path))
                     {
                         /// For union mode next() will be called again even if we found cached columns,
                         /// so we need to remember last_read_buffer to continue iterating through files in archive.
                         if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
                             last_read_buffer = archive_reader->readFile(std::move(file_enumerator));
-                        return {nullptr, cached_columns};
+                        return {nullptr, cached_schema, format};
                     }
 
                     read_buf = archive_reader->readFile(std::move(file_enumerator));
@@ -626,7 +706,7 @@ namespace
                 break;
             }
 
-            return {std::move(read_buf), std::nullopt};
+            return {std::move(read_buf), std::nullopt, format};
         }
 
         void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> buffer) override
@@ -640,7 +720,7 @@ namespace
             if (!getContext()->getSettingsRef().use_cache_for_count_from_files)
                 return;
 
-            auto key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(last_read_file_path, *format, format_settings, getContext());
             StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -653,7 +733,7 @@ namespace
             /// For union mode, schema can be different for different files in archive, so we need to
             /// cache last inferred schema only for last processed file.
             auto & schema_cache = StorageFile::getSchemaCache(getContext());
-            auto cache_key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext());
+            auto cache_key = getKeyForSchemaCache(last_read_file_path, *format, format_settings, getContext());
             schema_cache.addColumns(cache_key, columns);
         }
 
@@ -669,17 +749,42 @@ namespace
             for (const auto & archive : archive_info.paths_to_archives)
                 paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info.path_in_archive));
             auto & schema_cache = StorageFile::getSchemaCache(getContext());
-            auto cache_keys = getKeysForSchemaCache(paths_for_schema_cache, format, format_settings, getContext());
+            auto cache_keys = getKeysForSchemaCache(paths_for_schema_cache, *format, format_settings, getContext());
             schema_cache.addManyColumns(cache_keys, columns);
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+
         String getLastFileName() const override
         {
             return last_read_file_path;
         }
 
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            if (archive_info.isSingleFileRead())
+            {
+                chassert(current_archive_index > 0 && current_archive_index <= archive_info.paths_to_archives.size());
+                const auto & archive = archive_info.paths_to_archives[current_archive_index - 1];
+                auto archive_reader = createArchiveReader(archive);
+                return archive_reader->readFile(archive_info.path_in_archive, false);
+            }
+
+            chassert(current_archive_index >= 0 && current_archive_index < archive_info.paths_to_archives.size());
+            const auto & archive = archive_info.paths_to_archives[current_archive_index];
+            auto archive_reader = createArchiveReader(archive);
+            chassert(last_read_buffer);
+            file_enumerator = archive_reader->currentFile(std::move(last_read_buffer));
+            return archive_reader->readFile(std::move(file_enumerator));
+        }
+
     private:
-        std::optional<ColumnsDescription> tryGetColumnsFromSchemaCache(const std::string & archive_path, const std::string & full_path)
+        std::optional<ColumnsDescription> tryGetSchemaFromCache(const std::string & archive_path, const std::string & full_path)
         {
             auto context = getContext();
             if (!context->getSettingsRef().schema_inference_use_cache_for_file)
@@ -695,11 +800,28 @@ namespace
                 return file_stat.st_mtime;
             };
 
-            auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context);
-            auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-
-            if (columns)
-                return columns;
+            if (format)
+            {
+                auto cache_key = getKeyForSchemaCache(full_path, *format, format_settings, context);
+                if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                    return columns;
+            }
+            else
+            {
+                /// If format is unknown, we can iterate through all possible input formats
+                /// and check if we have an entry with this format and this file in schema cache.
+                /// If we have such entry for some format, we can use this format to read the file.
+                for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                {
+                    auto cache_key = getKeyForSchemaCache(full_path, format_name, format_settings, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                    {
+                        /// Now format is known. It should be the same for all files.
+                        format = format_name;
+                        return columns;
+                    }
+                }
+            }
 
             return std::nullopt;
         }
@@ -715,13 +837,13 @@ namespace
         std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator;
         std::unique_ptr<ReadBuffer> last_read_buffer;
 
-        String format;
+        std::optional<String> format;
         const std::optional<FormatSettings> & format_settings;
         std::vector<std::string> paths_for_schema_cache;
     };
 }
 
-ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr context)
+std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFromFileDescriptor(std::optional<String> format, const ContextPtr & context)
 {
     /// If we want to read schema from file descriptor we should create
     /// a read buffer from fd, create a checkpoint, read some data required
@@ -738,22 +860,29 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c
     read_buf->setCheckpoint();
     auto read_buffer_iterator = SingleReadBufferIterator(std::move(read_buf));
 
-    auto columns = readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, false, context, peekable_read_buffer_from_fd);
+    ColumnsDescription columns;
+    if (format)
+        columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context);
+    else
+        std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+
+    peekable_read_buffer_from_fd = read_buffer_iterator.releaseBuffer();
     if (peekable_read_buffer_from_fd)
     {
         /// If we have created read buffer in readSchemaFromFormat we should rollback to checkpoint.
         assert_cast<PeekableReadBuffer *>(peekable_read_buffer_from_fd.get())->rollbackToCheckpoint();
         has_peekable_read_buffer_from_fd = true;
     }
-    return columns;
+
+    return {columns, *format};
 }
 
-ColumnsDescription StorageFile::getTableStructureFromFile(
-    const String & format,
+std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFromFileImpl(
+    std::optional<String> format,
     const std::vector<String> & paths,
     const String & compression_method,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr context,
+    const ContextPtr & context,
     const std::optional<ArchiveInfo> & archive_info)
 {
     if (format == "Distributed")
@@ -761,29 +890,60 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
         if (paths.empty())
             throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Cannot get table structure from file, because no files match specified name");
 
-        return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList());
+        return {ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()), *format};
     }
 
     if (((archive_info && archive_info->paths_to_archives.empty()) || (!archive_info && paths.empty()))
-        && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
+        && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format)))
+    {
+        if (format)
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "The table structure cannot be extracted from a {} format file, because there are no files with provided path. "
+                "You can specify table structure manually", *format);
+
         throw Exception(
             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-            "Cannot extract table structure from {} format file, because there are no files with provided path. "
-            "You must specify table structure manually", format);
+            "The data format cannot be detected by the contents of the files, because there are no files with provided path. "
+            "You can specify the format manually");
+
+    }
 
     if (archive_info)
     {
         ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context);
-        return readSchemaFromFormat(
-            format,
-            format_settings,
-            read_buffer_iterator,
-            /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(),
-            context);
+        if (format)
+            return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+
+        return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
     }
 
     ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context);
-    return readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context);
+    if (format)
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+}
+
+ColumnsDescription StorageFile::getTableStructureFromFile(
+    const DB::String & format,
+    const std::vector<String> & paths,
+    const DB::String & compression_method,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context,
+    const std::optional<ArchiveInfo> & archive_info)
+{
+    return getTableStructureAndFormatFromFileImpl(format, paths, compression_method, format_settings, context, archive_info).first;
+}
+
+std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFromFile(
+    const std::vector<String> & paths,
+    const DB::String & compression_method,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context,
+    const std::optional<ArchiveInfo> & archive_info)
+{
+    return getTableStructureAndFormatFromFileImpl(std::nullopt, paths, compression_method, format_settings, context, archive_info);
 }
 
 bool StorageFile::supportsSubsetOfColumns(const ContextPtr & context) const
@@ -874,7 +1034,7 @@ StorageFile::StorageFile(CommonArguments args)
     , compression_method(args.compression_method)
     , base_path(args.getContext()->getPath())
 {
-    if (format_name != "Distributed")
+    if (format_name != "Distributed" && format_name != "auto")
         FormatFactory::instance().checkFormatName(format_name);
 }
 
@@ -886,16 +1046,19 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     {
         ColumnsDescription columns;
         if (use_table_fd)
-            columns = getTableStructureFromFileDescriptor(args.getContext());
+        {
+            if (format_name == "auto")
+                std::tie(columns, format_name) = getTableStructureAndFormatFromFileDescriptor(std::nullopt, args.getContext());
+            else
+                columns = getTableStructureAndFormatFromFileDescriptor(format_name, args.getContext()).first;
+        }
         else
         {
-            columns = getTableStructureFromFile(
-                format_name,
-                paths,
-                compression_method,
-                format_settings,
-                args.getContext(),
-                archive_info);
+            if (format_name == "auto")
+                std::tie(columns, format_name) = getTableStructureAndFormatFromFile(paths, compression_method, format_settings, args.getContext(), archive_info);
+            else
+                columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), archive_info);
+
             if (!args.columns.empty() && args.columns != columns)
                 throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different");
         }
@@ -903,6 +1066,8 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     }
     else
     {
+        if (format_name == "auto")
+            format_name = getTableStructureAndFormatFromFile(paths, compression_method, format_settings, args.getContext(), archive_info).second;
         /// We don't allow special columns in File storage.
         if (!args.columns.hasOnlyOrdinary())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine File doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
@@ -917,7 +1082,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
 }
 
 
-static std::chrono::seconds getLockTimeout(ContextPtr context)
+static std::chrono::seconds getLockTimeout(const ContextPtr & context)
 {
     const Settings & settings = context->getSettingsRef();
     Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds();
@@ -933,9 +1098,9 @@ StorageFileSource::FilesIterator::FilesIterator(
     std::optional<StorageFile::ArchiveInfo> archive_info_,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
-    ContextPtr context_,
+    const ContextPtr & context_,
     bool distributed_processing_)
-    : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
+    : WithContext(context_), files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_)
 {
     ActionsDAGPtr filter_dag;
     if (!distributed_processing && !archive_info && !files.empty())
@@ -948,7 +1113,7 @@ StorageFileSource::FilesIterator::FilesIterator(
 String StorageFileSource::FilesIterator::next()
 {
     if (distributed_processing)
-        return context->getReadTaskCallback()();
+        return getContext()->getReadTaskCallback()();
     else
     {
         const auto & fs = isReadFromArchive() ? archive_info->paths_to_archives : files;
@@ -972,12 +1137,12 @@ const String & StorageFileSource::FilesIterator::getFileNameInArchive()
 StorageFileSource::StorageFileSource(
     const ReadFromFormatInfo & info,
     std::shared_ptr<StorageFile> storage_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     UInt64 max_block_size_,
     FilesIteratorPtr files_iterator_,
     std::unique_ptr<ReadBuffer> read_buf_,
     bool need_only_count_)
-    : SourceWithKeyCondition(info.source_header, false)
+    : SourceWithKeyCondition(info.source_header, false), WithContext(context_)
     , storage(std::move(storage_))
     , files_iterator(std::move(files_iterator_))
     , read_buf(std::move(read_buf_))
@@ -985,13 +1150,12 @@ StorageFileSource::StorageFileSource(
     , requested_columns(info.requested_columns)
     , requested_virtual_columns(info.requested_virtual_columns)
     , block_for_format(info.format_header)
-    , context(context_)
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
 {
     if (!storage->use_table_fd)
     {
-        shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(context));
+        shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(getContext()));
         if (!shared_lock)
             throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded");
         storage->readers_counter.fetch_add(1, std::memory_order_release);
@@ -1008,7 +1172,7 @@ void StorageFileSource::beforeDestroy()
     if (std::uncaught_exceptions() == 0 && cnt == 1 && !storage->was_renamed)
     {
         shared_lock.unlock();
-        auto exclusive_lock = std::unique_lock{storage->rwlock, getLockTimeout(context)};
+        auto exclusive_lock = std::unique_lock{storage->rwlock, getLockTimeout(getContext())};
 
         if (!exclusive_lock)
             return;
@@ -1027,7 +1191,7 @@ void StorageFileSource::beforeDestroy()
                 file_path = file_path.lexically_normal();
 
                 // Checking access rights
-                checkCreationIsAllowed(context, context->getUserFilesPath(), file_path, true);
+                checkCreationIsAllowed(getContext(), getContext()->getUserFilesPath(), file_path, true);
 
                 // Checking an existing of new file
                 if (fs::exists(file_path))
@@ -1060,7 +1224,7 @@ void StorageFileSource::setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nod
 
 bool StorageFileSource::tryGetCountFromCache(const struct stat & file_stat)
 {
-    if (!context->getSettingsRef().use_cache_for_count_from_files)
+    if (!getContext()->getSettingsRef().use_cache_for_count_from_files)
         return false;
 
     auto num_rows_from_cache = tryGetNumRowsFromCache(current_path, file_stat.st_mtime);
@@ -1102,7 +1266,7 @@ Chunk StorageFileSource::generate()
                             return {};
 
                         auto file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName());
-                        if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
+                        if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
                             continue;
 
                         archive_reader = createArchiveReader(archive);
@@ -1116,7 +1280,7 @@ Chunk StorageFileSource::generate()
                         if (!read_buf)
                             continue;
 
-                        if (auto progress_callback = context->getFileProgressCallback())
+                        if (auto progress_callback = getContext()->getFileProgressCallback())
                             progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0)));
                     }
                     else
@@ -1130,7 +1294,7 @@ Chunk StorageFileSource::generate()
                                     return {};
 
                                 current_archive_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName());
-                                if (context->getSettingsRef().engine_file_skip_empty_files && current_archive_stat.st_size == 0)
+                                if (getContext()->getSettingsRef().engine_file_skip_empty_files && current_archive_stat.st_size == 0)
                                     continue;
 
                                 archive_reader = createArchiveReader(archive);
@@ -1164,7 +1328,7 @@ Chunk StorageFileSource::generate()
                             continue;
 
                         read_buf = archive_reader->readFile(std::move(file_enumerator));
-                        if (auto progress_callback = context->getFileProgressCallback())
+                        if (auto progress_callback = getContext()->getFileProgressCallback())
                             progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0)));
                     }
                 }
@@ -1190,16 +1354,16 @@ Chunk StorageFileSource::generate()
                 file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
                 current_file_size = file_stat.st_size;
 
-                if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
+                if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
                     continue;
 
                 if (need_only_count && tryGetCountFromCache(file_stat))
                     continue;
 
-                read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
+                read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, getContext());
             }
 
-            const Settings & settings = context->getSettingsRef();
+            const Settings & settings = getContext()->getSettingsRef();
 
             size_t file_num = 0;
             if (storage->archive_info)
@@ -1211,7 +1375,7 @@ Chunk StorageFileSource::generate()
 
             const auto max_parsing_threads = std::max<size_t>(settings.max_threads / file_num, 1UL);
             input_format = FormatFactory::instance().getInput(
-                storage->format_name, *read_buf, block_for_format, context, max_block_size, storage->format_settings,
+                storage->format_name, *read_buf, block_for_format, getContext(), max_block_size, storage->format_settings,
                 max_parsing_threads, std::nullopt, /*is_remote_fs*/ false, CompressionMethod::None, need_only_count);
 
             if (key_condition)
@@ -1227,7 +1391,7 @@ Chunk StorageFileSource::generate()
             {
                 builder.addSimpleTransform([&](const Block & header)
                 {
-                    return std::make_shared<AddingDefaultsTransform>(header, columns_description, *input_format, context);
+                    return std::make_shared<AddingDefaultsTransform>(header, columns_description, *input_format, getContext());
                 });
             }
 
@@ -1264,7 +1428,7 @@ Chunk StorageFileSource::generate()
         if (storage->use_table_fd)
             finished_generate = true;
 
-        if (input_format && storage->format_name != "Distributed" && context->getSettingsRef().use_cache_for_count_from_files)
+        if (input_format && storage->format_name != "Distributed" && getContext()->getSettingsRef().use_cache_for_count_from_files)
             addNumRowsToCache(current_path, total_rows_in_file);
 
         total_rows_in_file = 0;
@@ -1295,14 +1459,14 @@ Chunk StorageFileSource::generate()
 
 void StorageFileSource::addNumRowsToCache(const String & path, size_t num_rows) const
 {
-    auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context);
-    StorageFile::getSchemaCache(context).addNumRows(key, num_rows);
+    auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, getContext());
+    StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows);
 }
 
 std::optional<size_t> StorageFileSource::tryGetNumRowsFromCache(const String & path, time_t last_mod_time) const
 {
-    auto & schema_cache = StorageFile::getSchemaCache(context);
-    auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context);
+    auto & schema_cache = StorageFile::getSchemaCache(getContext());
+    auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, getContext());
     auto get_last_mod_time = [&]() -> std::optional<time_t>
     {
         return last_mod_time;
@@ -1311,7 +1475,7 @@ std::optional<size_t> StorageFileSource::tryGetNumRowsFromCache(const String & p
     return schema_cache.tryGetNumRows(key, get_last_mod_time);
 }
 
-class ReadFromFile : public SourceStepWithFilter
+class ReadFromFile : public SourceStepWithFilter, WithContext
 {
 public:
     std::string getName() const override { return "ReadFromFile"; }
@@ -1323,14 +1487,13 @@ public:
         std::shared_ptr<StorageFile> storage_,
         ReadFromFormatInfo info_,
         const bool need_only_count_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}), WithContext(context_)
         , storage(std::move(storage_))
         , info(std::move(info_))
         , need_only_count(need_only_count_)
-        , context(std::move(context_))
         , max_block_size(max_block_size_)
         , max_num_streams(num_streams_)
     {
@@ -1341,7 +1504,6 @@ private:
     ReadFromFormatInfo info;
     const bool need_only_count;
 
-    ContextPtr context;
     size_t max_block_size;
     const size_t max_num_streams;
 
@@ -1352,7 +1514,7 @@ private:
 
 void ReadFromFile::applyFilters()
 {
-    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, getContext());
     const ActionsDAG::Node * predicate = nullptr;
     if (filter_actions_dag)
         predicate = filter_actions_dag->getOutputs().at(0);
@@ -1422,7 +1584,7 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
         storage->archive_info,
         predicate,
         storage->virtual_columns,
-        context,
+        getContext(),
         storage->distributed_processing);
 }
 
@@ -1444,8 +1606,10 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     Pipes pipes;
     pipes.reserve(num_streams);
 
+    auto ctx = getContext();
+
     /// Set total number of bytes to process. For progress bar.
-    auto progress_callback = context->getFileProgressCallback();
+    auto progress_callback = ctx->getFileProgressCallback();
 
     if (progress_callback && !storage->archive_info)
         progress_callback(FileProgress(0, storage->total_bytes_to_read));
@@ -1463,20 +1627,20 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
         auto source = std::make_shared<StorageFileSource>(
             info,
             storage,
-            context,
+            ctx,
             max_block_size,
             files_iterator,
             std::move(read_buffer),
             need_only_count);
 
-        source->setKeyCondition(filter_nodes.nodes, context);
+        source->setKeyCondition(filter_nodes.nodes, ctx);
         pipes.emplace_back(std::move(source));
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
     size_t output_ports = pipe.numOutputPorts();
-    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
-    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
+    const bool parallelize_output = ctx->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && storage->parallelizeOutputAfterReading(ctx) && output_ports > 0 && output_ports < max_num_streams)
         pipe.resize(max_num_streams);
 
     if (pipe.empty())
@@ -1489,7 +1653,7 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
 }
 
 
-class StorageFileSink final : public SinkToStorage
+class StorageFileSink final : public SinkToStorage, WithContext
 {
 public:
     StorageFileSink(
@@ -1502,9 +1666,9 @@ public:
         const CompressionMethod compression_method_,
         const std::optional<FormatSettings> & format_settings_,
         const String format_name_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         int flags_)
-        : SinkToStorage(metadata_snapshot_->getSampleBlock())
+        : SinkToStorage(metadata_snapshot_->getSampleBlock()), WithContext(context_)
         , metadata_snapshot(metadata_snapshot_)
         , table_name_for_log(table_name_for_log_)
         , table_fd(table_fd_)
@@ -1514,7 +1678,6 @@ public:
         , compression_method(compression_method_)
         , format_name(format_name_)
         , format_settings(format_settings_)
-        , context(context_)
         , flags(flags_)
     {
         initialize();
@@ -1531,9 +1694,9 @@ public:
         const CompressionMethod compression_method_,
         const std::optional<FormatSettings> & format_settings_,
         const String format_name_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         int flags_)
-        : SinkToStorage(metadata_snapshot_->getSampleBlock())
+        : SinkToStorage(metadata_snapshot_->getSampleBlock()), WithContext(context_)
         , metadata_snapshot(metadata_snapshot_)
         , table_name_for_log(table_name_for_log_)
         , table_fd(table_fd_)
@@ -1543,7 +1706,6 @@ public:
         , compression_method(compression_method_)
         , format_name(format_name_)
         , format_settings(format_settings_)
-        , context(context_)
         , flags(flags_)
         , lock(std::move(lock_))
     {
@@ -1567,7 +1729,7 @@ public:
 
         /// In case of formats with prefixes if file is not empty we have already written prefix.
         bool do_not_write_prefix = naked_buffer->size();
-        const auto & settings = context->getSettingsRef();
+        const auto & settings = getContext()->getSettingsRef();
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::move(naked_buffer),
             compression_method,
@@ -1575,7 +1737,7 @@ public:
             static_cast<int>(settings.output_format_compression_zstd_window_log));
 
         writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name,
-                                                                             *write_buf, metadata_snapshot->getSampleBlock(), context, format_settings);
+                                                                             *write_buf, metadata_snapshot->getSampleBlock(), getContext(), format_settings);
 
         if (do_not_write_prefix)
             writer->doNotWritePrefix();
@@ -1658,7 +1820,6 @@ private:
     std::string format_name;
     std::optional<FormatSettings> format_settings;
 
-    ContextPtr context;
     int flags;
     std::unique_lock<std::shared_timed_mutex> lock;
 
@@ -2043,7 +2204,7 @@ StorageFile::ArchiveInfo StorageFile::getArchiveInfo(
     const std::string & path_to_archive,
     const std::string & file_in_archive,
     const std::string & user_files_path,
-    ContextPtr context,
+    const ContextPtr & context,
     size_t & total_bytes_to_read
 )
 {
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index b74868597a6..a5ccbc8f506 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -84,7 +84,7 @@ public:
 
     static Names getVirtualColumnNames();
 
-    static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read);
+    static Strings getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read);
 
     /// Check if the format supports reading only some subset of columns.
     /// Is is useful because such formats could effectively skip unknown columns
@@ -112,14 +112,19 @@ public:
         }
     };
 
-    ColumnsDescription getTableStructureFromFileDescriptor(ContextPtr context);
-
     static ColumnsDescription getTableStructureFromFile(
         const String & format,
         const std::vector<String> & paths,
         const String & compression_method,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr context,
+        const ContextPtr & context,
+        const std::optional<ArchiveInfo> & archive_info = std::nullopt);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromFile(
+        const std::vector<String> & paths,
+        const String & compression_method,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context,
         const std::optional<ArchiveInfo> & archive_info = std::nullopt);
 
     static SchemaCache & getSchemaCache(const ContextPtr & context);
@@ -130,7 +135,7 @@ public:
         const std::string & path_to_archive,
         const std::string & file_in_archive,
         const std::string & user_files_path,
-        ContextPtr context,
+        const ContextPtr & context,
         size_t & total_bytes_to_read);
 
     bool supportsTrivialCountOptimization() const override { return true; }
@@ -141,6 +146,16 @@ protected:
     friend class ReadFromFile;
 
 private:
+    std::pair<ColumnsDescription, String> getTableStructureAndFormatFromFileDescriptor(std::optional<String> format, const ContextPtr & context);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromFileImpl(
+        std::optional<String> format,
+        const std::vector<String> & paths,
+        const String & compression_method,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context,
+        const std::optional<ArchiveInfo> & archive_info = std::nullopt);
+
     void setStorageMetadata(CommonArguments args);
 
     std::string format_name;
@@ -187,10 +202,10 @@ private:
     bool distributed_processing = false;
 };
 
-class StorageFileSource : public SourceWithKeyCondition
+class StorageFileSource : public SourceWithKeyCondition, WithContext
 {
 public:
-    class FilesIterator
+    class FilesIterator : WithContext
     {
     public:
         explicit FilesIterator(
@@ -198,7 +213,7 @@ public:
             std::optional<StorageFile::ArchiveInfo> archive_info_,
             const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns,
-            ContextPtr context_,
+            const ContextPtr & context_,
             bool distributed_processing_ = false);
 
         String next();
@@ -227,8 +242,6 @@ private:
         std::atomic<size_t> index = 0;
 
         bool distributed_processing;
-
-        ContextPtr context;
     };
 
     using FilesIteratorPtr = std::shared_ptr<FilesIterator>;
@@ -236,7 +249,7 @@ private:
     StorageFileSource(
         const ReadFromFormatInfo & info,
         std::shared_ptr<StorageFile> storage_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         UInt64 max_block_size_,
         FilesIteratorPtr files_iterator_,
         std::unique_ptr<ReadBuffer> read_buf_,
@@ -286,7 +299,6 @@ private:
     NamesAndTypesList requested_virtual_columns;
     Block block_for_format;
 
-    ContextPtr context;    /// TODO Untangle potential issues with context lifetime.
     UInt64 max_block_size;
 
     bool finished_generate = false;
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index c12124f1e07..65eec0a7ea1 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -25,36 +25,39 @@ extern const int LOGICAL_ERROR;
 }
 
 StorageFileCluster::StorageFileCluster(
-    ContextPtr context_,
+    const ContextPtr & context,
     const String & cluster_name_,
     const String & filename_,
     const String & format_name_,
-    const String & compression_method_,
+    const String & compression_method,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
+    const ConstraintsDescription & constraints_)
+    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageFileCluster (" + table_id_.table_name + ")"))
     , filename(filename_)
     , format_name(format_name_)
-    , compression_method(compression_method_)
 {
     StorageInMemoryMetadata storage_metadata;
 
     size_t total_bytes_to_read; // its value isn't used as we are not reading files (just listing them). But it is required by getPathsList
-    paths = StorageFile::getPathsList(filename_, context_->getUserFilesPath(), context_, total_bytes_to_read);
+    paths = StorageFile::getPathsList(filename_, context->getUserFilesPath(), context, total_bytes_to_read);
 
     if (columns_.empty())
     {
-        auto columns = StorageFile::getTableStructureFromFile(format_name,
-                                                             paths,
-                                                             compression_method,
-                                                             std::nullopt,
-                                                             context_);
+        ColumnsDescription columns;
+        if (format_name == "auto")
+            std::tie(columns, format_name) = StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context);
+        else
+            columns = StorageFile::getTableStructureFromFile(format_name, paths, compression_method, std::nullopt, context);
+
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (format_name == "auto")
+            format_name = StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context).second;
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -62,13 +65,14 @@ StorageFileCluster::StorageFileCluster(
     virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-void StorageFileCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
+void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function fileCluster, got '{}'", queryToString(query));
 
-    TableFunctionFileCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
+    TableFunctionFileCluster::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context);
 }
 
 RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h
index a6e57c3bb4f..2803c8b6e5b 100644
--- a/src/Storages/StorageFileCluster.h
+++ b/src/Storages/StorageFileCluster.h
@@ -17,15 +17,14 @@ class StorageFileCluster : public IStorageCluster
 {
 public:
     StorageFileCluster(
-        ContextPtr context_,
+        const ContextPtr & context_,
         const String & cluster_name_,
         const String & filename_,
         const String & format_name_,
         const String & compression_method_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        bool structure_argument_was_provided_);
+        const ConstraintsDescription & constraints_);
 
     std::string getName() const override { return "FileCluster"; }
 
@@ -38,12 +37,11 @@ public:
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
+    void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
 
     Strings paths;
     String filename;
     String format_name;
-    String compression_method;
     NamesAndTypesList virtual_columns;
 };
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index c376af5a3d7..8e5b6040a63 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -129,6 +129,7 @@ namespace ErrorCodes
     extern const int UNEXPECTED_EXPRESSION;
     extern const int DATABASE_ACCESS_DENIED;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int CANNOT_DETECT_FORMAT;
     extern const int NOT_IMPLEMENTED;
     extern const int CANNOT_COMPILE_REGEXP;
     extern const int FILE_DOESNT_EXIST;
@@ -428,7 +429,7 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     const S3::URI & globbed_uri_,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
-    ContextPtr context,
+    const ContextPtr & context,
     KeysWithInfo * read_keys_,
     const S3Settings::RequestSettings & request_settings_,
     std::function<void(FileProgress)> file_progress_callback_)
@@ -563,7 +564,7 @@ StorageS3Source::StorageS3Source(
     const ReadFromFormatInfo & info,
     const String & format_,
     String name_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     std::optional<FormatSettings> format_settings_,
     UInt64 max_block_size_,
     const S3Settings::RequestSettings & request_settings_,
@@ -841,7 +842,7 @@ public:
     StorageS3Sink(
         const String & format,
         const Block & sample_block_,
-        ContextPtr context,
+        const ContextPtr & context,
         std::optional<FormatSettings> format_settings_,
         const CompressionMethod compression_method,
         const StorageS3::Configuration & configuration_,
@@ -949,23 +950,22 @@ private:
 };
 
 
-class PartitionedStorageS3Sink : public PartitionedSink
+class PartitionedStorageS3Sink : public PartitionedSink, WithContext
 {
 public:
     PartitionedStorageS3Sink(
         const ASTPtr & partition_by,
         const String & format_,
         const Block & sample_block_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         std::optional<FormatSettings> format_settings_,
         const CompressionMethod compression_method_,
         const StorageS3::Configuration & configuration_,
         const String & bucket_,
         const String & key_)
-        : PartitionedSink(partition_by, context_, sample_block_)
+        : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_)
         , format(format_)
         , sample_block(sample_block_)
-        , context(context_)
         , compression_method(compression_method_)
         , configuration(configuration_)
         , bucket(bucket_)
@@ -985,7 +985,7 @@ public:
         return std::make_shared<StorageS3Sink>(
             format,
             sample_block,
-            context,
+            getContext(),
             format_settings,
             compression_method,
             configuration,
@@ -997,7 +997,6 @@ public:
 private:
     const String format;
     const Block sample_block;
-    const ContextPtr context;
     const CompressionMethod compression_method;
     const StorageS3::Configuration configuration;
     const String bucket;
@@ -1033,7 +1032,7 @@ private:
 
 StorageS3::StorageS3(
     const Configuration & configuration_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
@@ -1050,18 +1049,27 @@ StorageS3::StorageS3(
 {
     updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
 
-    FormatFactory::instance().checkFormatName(configuration.format);
+    if (configuration.format != "auto")
+        FormatFactory::instance().checkFormatName(configuration.format);
     context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri);
     context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast);
 
     StorageInMemoryMetadata storage_metadata;
     if (columns_.empty())
     {
-        auto columns = getTableStructureFromDataImpl(configuration, format_settings, context_);
+        ColumnsDescription columns;
+        if (configuration.format == "auto")
+            std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_);
+        else
+            columns = getTableStructureFromData(configuration, format_settings, context_);
+
         storage_metadata.setColumns(columns);
     }
     else
     {
+        if (configuration.format == "auto")
+            configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second;
+
         /// We don't allow special columns in S3 storage.
         if (!columns_.hasOnlyOrdinary())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
@@ -1350,14 +1358,14 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
         LOG_WARNING(&Poco::Logger::get("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage());
 }
 
-StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context)
+StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context)
 {
     std::lock_guard lock(configuration_update_mutex);
     configuration.update(local_context);
     return configuration;
 }
 
-void StorageS3::updateConfiguration(ContextPtr local_context)
+void StorageS3::updateConfiguration(const ContextPtr & local_context)
 {
     std::lock_guard lock(configuration_update_mutex);
     configuration.update(local_context);
@@ -1375,7 +1383,7 @@ const StorageS3::Configuration & StorageS3::getConfiguration()
     return configuration;
 }
 
-bool StorageS3::Configuration::update(ContextPtr context)
+bool StorageS3::Configuration::update(const ContextPtr & context)
 {
     auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString());
     request_settings = s3_settings.request_settings;
@@ -1390,7 +1398,7 @@ bool StorageS3::Configuration::update(ContextPtr context)
     return true;
 }
 
-void StorageS3::Configuration::connect(ContextPtr context)
+void StorageS3::Configuration::connect(const ContextPtr & context)
 {
     const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
     const Settings & local_settings = context->getSettingsRef();
@@ -1462,7 +1470,7 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur
     configuration.request_settings = S3Settings::RequestSettings(collection);
 }
 
-StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file)
+StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file)
 {
     StorageS3::Configuration configuration;
 
@@ -1601,7 +1609,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
     configuration.keys = {configuration.url.key};
 
     if (configuration.format == "auto" && get_format_from_file)
-        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.key, true);
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.url.key).value_or("auto");
 
     return configuration;
 }
@@ -1609,9 +1617,17 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
 ColumnsDescription StorageS3::getTableStructureFromData(
     const StorageS3::Configuration & configuration,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
+    const ContextPtr & ctx)
 {
-    return getTableStructureFromDataImpl(configuration, format_settings, ctx);
+    return getTableStructureAndFormatFromDataImpl(configuration.format, configuration, format_settings, ctx).first;
+}
+
+std::pair<ColumnsDescription, String> StorageS3::getTableStructureAndFormatFromData(
+    const StorageS3::Configuration & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & ctx)
+{
+    return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx);
 }
 
 namespace
@@ -1623,24 +1639,43 @@ namespace
             std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
             const StorageS3Source::KeysWithInfo & read_keys_,
             const StorageS3::Configuration & configuration_,
+            std::optional<String> format_,
             const std::optional<FormatSettings> & format_settings_,
             const ContextPtr & context_)
             : WithContext(context_)
             , file_iterator(file_iterator_)
             , read_keys(read_keys_)
             , configuration(configuration_)
+            , format(std::move(format_))
             , format_settings(format_settings_)
             , prev_read_keys_size(read_keys_.size())
         {
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
-            /// For default mode check cached columns for currently read keys on first iteration.
-            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            if (first)
             {
-                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                    return {nullptr, cached_columns};
+                /// If format is unknown we iterate through all currently read keys on first iteration and
+                /// try to determine format by file name.
+                if (!format)
+                {
+                    for (const auto & key_with_info : read_keys)
+                    {
+                        if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->key))
+                        {
+                            format = format_from_file_name;
+                            break;
+                        }
+                    }
+                }
+
+                /// For default mode check cached columns for currently read keys on first iteration.
+                if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                {
+                    if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                        return {nullptr, cached_columns, format};
+                }
             }
 
             while (true)
@@ -1650,13 +1685,34 @@ namespace
                 if (!current_key_with_info || current_key_with_info->key.empty())
                 {
                     if (first)
-                        throw Exception(
-                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because there are no files with provided path "
-                            "in S3 or all files are empty. You must specify table structure manually",
-                            configuration.format);
+                    {
+                        if (format)
+                            throw Exception(
+                                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
+                                "in S3 or all files are empty. You can specify table structure manually",
+                                *format);
 
-                    return {nullptr, std::nullopt};
+                        throw Exception(
+                            ErrorCodes::CANNOT_DETECT_FORMAT,
+                            "The data format cannot be detected by the contents of the files, because there are no files with provided path "
+                            "in S3 or all files are empty. You can specify the format manually");
+                    }
+
+                    return {nullptr, std::nullopt, format};
+                }
+
+                /// S3 file iterator could get new keys after new iteration, if format is unknown we can try to determine it by new file names.
+                if (!format && read_keys.size() > prev_read_keys_size)
+                {
+                    for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
+                    {
+                        if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key))
+                        {
+                            format = format_from_file_name;
+                            break;
+                        }
+                    }
                 }
 
                 /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
@@ -1665,9 +1721,11 @@ namespace
                     auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
                     prev_read_keys_size = read_keys.size();
                     if (columns_from_cache)
-                        return {nullptr, columns_from_cache};
+                        return {nullptr, columns_from_cache, format};
                 }
 
+                prev_read_keys_size = read_keys.size();
+
                 if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
                     continue;
 
@@ -1678,7 +1736,7 @@ namespace
                     if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end()))
                     {
                         first = false;
-                        return {nullptr, columns_from_cache};
+                        return {nullptr, columns_from_cache, format};
                     }
                 }
 
@@ -1687,7 +1745,7 @@ namespace
                 if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
                 {
                     first = false;
-                    return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt};
+                    return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt, format};
                 }
             }
         }
@@ -1698,7 +1756,7 @@ namespace
                 return;
 
             String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(source, *format, format_settings, getContext());
             StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -1709,7 +1767,7 @@ namespace
                 return;
 
             String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
-            auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext());
             StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns);
         }
 
@@ -1723,10 +1781,15 @@ namespace
             Strings sources;
             sources.reserve(read_keys.size());
             std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
-            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
+            auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext());
             StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+
         String getLastFileName() const override
         {
             if (current_key_with_info)
@@ -1734,15 +1797,26 @@ namespace
             return "";
         }
 
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            chassert(current_key_with_info);
+            int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
+            auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
+            return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max);
+        }
+
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(
             const StorageS3::KeysWithInfo::const_iterator & begin,
             const StorageS3::KeysWithInfo::const_iterator & end)
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+            auto context = getContext();
+            if (!context->getSettingsRef().schema_inference_use_cache_for_s3)
                 return std::nullopt;
 
-            auto & schema_cache = StorageS3::getSchemaCache(getContext());
+            auto & schema_cache = StorageS3::getSchemaCache(context);
             for (auto it = begin; it < end; ++it)
             {
                 auto get_last_mod_time = [&]
@@ -1773,10 +1847,29 @@ namespace
 
                 String path = fs::path(configuration.url.bucket) / (*it)->key;
                 String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
-                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
+
+                if (format)
+                {
+                    auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        return columns;
+                }
+                else
+                {
+                    /// If format is unknown, we can iterate through all possible input formats
+                    /// and check if we have an entry with this format and this file in schema cache.
+                    /// If we have such entry for some format, we can use this format to read the file.
+                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                    {
+                        auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context);
+                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        {
+                            /// Now format is known. It should be the same for all files.
+                            format = format_name;
+                            return columns;
+                        }
+                    }
+                }
             }
 
             return std::nullopt;
@@ -1785,6 +1878,7 @@ namespace
         std::shared_ptr<StorageS3Source::IIterator> file_iterator;
         const StorageS3Source::KeysWithInfo & read_keys;
         const StorageS3::Configuration & configuration;
+        std::optional<String> format;
         const std::optional<FormatSettings> & format_settings;
         StorageS3Source::KeyWithInfoPtr current_key_with_info;
         size_t prev_read_keys_size;
@@ -1793,17 +1887,20 @@ namespace
 
 }
 
-ColumnsDescription StorageS3::getTableStructureFromDataImpl(
+std::pair<ColumnsDescription, String> StorageS3::getTableStructureAndFormatFromDataImpl(
+    std::optional<String> format,
     const Configuration & configuration,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
+    const ContextPtr & ctx)
 {
     KeysWithInfo read_keys;
 
     auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
 
-    ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
-    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
+    ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx);
+    if (format)
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format};
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx);
 }
 
 void registerStorageS3Impl(const String & name, StorageFactory & factory)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index b90a0d394cb..cb3c3f4b947 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -80,7 +80,7 @@ public:
             const S3::URI & globbed_uri_,
             const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns,
-            ContextPtr context,
+            const ContextPtr & context,
             KeysWithInfo * read_keys_ = nullptr,
             const S3Settings::RequestSettings & request_settings_ = {},
             std::function<void(FileProgress)> progress_callback_ = {});
@@ -134,7 +134,7 @@ public:
         const ReadFromFormatInfo & info,
         const String & format,
         String name_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         std::optional<FormatSettings> format_settings_,
         UInt64 max_block_size_,
         const S3Settings::RequestSettings & request_settings_,
@@ -280,9 +280,9 @@ public:
 
         String getPath() const { return url.key; }
 
-        bool update(ContextPtr context);
+        bool update(const ContextPtr & context);
 
-        void connect(ContextPtr context);
+        void connect(const ContextPtr & context);
 
         bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; }
 
@@ -308,7 +308,7 @@ public:
 
     StorageS3(
         const Configuration & configuration_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
@@ -345,21 +345,26 @@ public:
 
     static SchemaCache & getSchemaCache(const ContextPtr & ctx);
 
-    static StorageS3::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file = true);
+    static StorageS3::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true);
 
     static ColumnsDescription getTableStructureFromData(
         const StorageS3::Configuration & configuration,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx);
+        const ContextPtr & ctx);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
+        const StorageS3::Configuration & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & ctx);
 
     using KeysWithInfo = StorageS3Source::KeysWithInfo;
 
     bool supportsTrivialCountOptimization() const override { return true; }
 
 protected:
-    virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context);
+    virtual Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context);
 
-    virtual void updateConfiguration(ContextPtr local_context);
+    virtual void updateConfiguration(const ContextPtr & local_context);
 
     void useConfiguration(const Configuration & new_configuration);
 
@@ -380,10 +385,11 @@ private:
     std::optional<FormatSettings> format_settings;
     ASTPtr partition_by;
 
-    static ColumnsDescription getTableStructureFromDataImpl(
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromDataImpl(
+        std::optional<String> format,
         const Configuration & configuration,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx);
+        const ContextPtr & ctx);
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index e1738056e9d..5264372889e 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -38,25 +38,34 @@ StorageS3Cluster::StorageS3Cluster(
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    ContextPtr context_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
+    const ContextPtr & context)
+    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"))
     , s3_configuration{configuration_}
 {
-    context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
-    context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast);
+    context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
+    context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast);
 
     StorageInMemoryMetadata storage_metadata;
-    updateConfigurationIfChanged(context_);
+    updateConfigurationIfChanged(context);
 
     if (columns_.empty())
     {
+        ColumnsDescription columns;
         /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function
-        auto columns = StorageS3::getTableStructureFromDataImpl(s3_configuration, /*format_settings=*/std::nullopt, context_);
+        if (s3_configuration.format == "auto")
+            std::tie(columns, s3_configuration.format) = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context);
+        else
+            columns = StorageS3::getTableStructureFromData(s3_configuration, /*format_settings=*/std::nullopt, context);
+
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (s3_configuration.format == "auto")
+            s3_configuration.format = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context).second;
+
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -64,13 +73,17 @@ StorageS3Cluster::StorageS3Cluster(
     virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
+void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
 
-    TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context);
+    TableFunctionS3Cluster::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children,
+        storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(),
+        s3_configuration.format,
+        context);
 }
 
 void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
index c526f14834a..ac25c506337 100644
--- a/src/Storages/StorageS3Cluster.h
+++ b/src/Storages/StorageS3Cluster.h
@@ -27,8 +27,7 @@ public:
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        ContextPtr context_,
-        bool structure_argument_was_provided_);
+        const ContextPtr & context_);
 
     std::string getName() const override { return "S3Cluster"; }
 
@@ -46,7 +45,7 @@ protected:
 private:
     void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); }
 
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
+    void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
 
     StorageS3::Configuration s3_configuration;
     NamesAndTypesList virtual_columns;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 0ba72af6fc0..a68ed6965fc 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -101,7 +101,7 @@ static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
 
 IStorageURLBase::IStorageURLBase(
     const String & uri_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     const StorageID & table_id_,
     const String & format_name_,
     const std::optional<FormatSettings> & format_settings_,
@@ -123,16 +123,26 @@ IStorageURLBase::IStorageURLBase(
     , partition_by(partition_by_)
     , distributed_processing(distributed_processing_)
 {
-    FormatFactory::instance().checkFormatName(format_name);
+    if (format_name != "auto")
+        FormatFactory::instance().checkFormatName(format_name);
+
     StorageInMemoryMetadata storage_metadata;
 
     if (columns_.empty())
     {
-        auto columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_);
+        ColumnsDescription columns;
+        if (format_name == "auto")
+            std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri, compression_method, headers, format_settings, context_);
+        else
+            columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_);
+
         storage_metadata.setColumns(columns);
     }
     else
     {
+        if (format_name == "auto")
+            format_name = getTableStructureAndFormatFromData(uri, compression_method, headers, format_settings, context_).second;
+
         /// We don't allow special columns in URL storage.
         if (!columns_.hasOnlyOrdinary())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine URL doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
@@ -257,7 +267,7 @@ StorageURLSource::StorageURLSource(
     const String & format_,
     const std::optional<FormatSettings> & format_settings_,
     String name_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     UInt64 max_block_size,
     const ConnectionTimeouts & timeouts,
     CompressionMethod compression_method,
@@ -525,7 +535,7 @@ StorageURLSink::StorageURLSink(
     const String & format,
     const std::optional<FormatSettings> & format_settings,
     const Block & sample_block,
-    ContextPtr context,
+    const ContextPtr & context,
     const ConnectionTimeouts & timeouts,
     const CompressionMethod compression_method,
     const HTTPHeaderEntries & headers,
@@ -668,7 +678,7 @@ std::vector<std::pair<std::string, std::string>> IStorageURLBase::getReadURIPara
     const Names & /*column_names*/,
     const StorageSnapshotPtr & /*storage_snapshot*/,
     const SelectQueryInfo & /*query_info*/,
-    ContextPtr /*context*/,
+    const ContextPtr & /*context*/,
     QueryProcessingStage::Enum & /*processed_stage*/,
     size_t /*max_block_size*/) const
 {
@@ -679,7 +689,7 @@ std::function<void(std::ostream &)> IStorageURLBase::getReadPOSTDataCallback(
     const Names & /*column_names*/,
     const ColumnsDescription & /* columns_description */,
     const SelectQueryInfo & /*query_info*/,
-    ContextPtr /*context*/,
+    const ContextPtr & /*context*/,
     QueryProcessingStage::Enum & /*processed_stage*/,
     size_t /*max_block_size*/) const
 {
@@ -693,28 +703,48 @@ namespace
     public:
         ReadBufferIterator(
             const std::vector<String> & urls_to_check_,
-            const String & format_,
+            std::optional<String> format_,
             const CompressionMethod & compression_method_,
             const HTTPHeaderEntries & headers_,
             const std::optional<FormatSettings> & format_settings_,
             const ContextPtr & context_)
-            : WithContext(context_), format(format_), compression_method(compression_method_), headers(headers_), format_settings(format_settings_)
+            : WithContext(context_), format(std::move(format_)), compression_method(compression_method_), headers(headers_), format_settings(format_settings_)
         {
             url_options_to_check.reserve(urls_to_check_.size());
             for (const auto & url : urls_to_check_)
                 url_options_to_check.push_back(getFailoverOptions(url, getContext()->getSettingsRef().glob_expansion_max_elements));
         }
 
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+        Data next() override
         {
             bool is_first = (current_index == 0);
-            /// For default mode check cached columns for all urls on first iteration.
-            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            if (is_first)
             {
-                for (const auto & options : url_options_to_check)
+                /// If format is unknown we iterate through all url options on first iteration and
+                /// try to determine format by file name.
+                if (!format)
                 {
-                    if (auto cached_columns = tryGetColumnsFromCache(options))
-                        return {nullptr, cached_columns};
+                    for (const auto & options : url_options_to_check)
+                    {
+                        for (const auto & url : options)
+                        {
+                            if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url))
+                            {
+                                format = format_from_file_name;
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                /// For default mode check cached columns for all urls on first iteration.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                {
+                    for (const auto & options : url_options_to_check)
+                    {
+                        if (auto cached_columns = tryGetColumnsFromCache(options))
+                            return {nullptr, cached_columns, format};
+                    }
                 }
             }
 
@@ -724,20 +754,30 @@ namespace
                 if (current_index == url_options_to_check.size())
                 {
                     if (is_first)
+                    {
+                        if (format)
+                            throw Exception(
+                                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                "The table structure cannot be extracted from a {} format file, because all files are empty. "
+                                "You can specify table structure manually",
+                                *format);
+
                         throw Exception(
                             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because all files are empty. "
-                            "You must specify table structure manually",
-                            format);
-                    return {nullptr, std::nullopt};
+                            "The data format cannot be detected by the contents of the files, because there are no files with provided path "
+                            "You can specify the format manually");
+
+                    }
+
+                    return {nullptr, std::nullopt, format};
                 }
 
                 if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
                 {
-                    if (auto cached_columns = tryGetColumnsFromCache(url_options_to_check[current_index]))
+                    if (auto cached_schema = tryGetColumnsFromCache(url_options_to_check[current_index]))
                     {
                         ++current_index;
-                        return {nullptr, cached_columns};
+                        return {nullptr, cached_schema, format};
                     }
                 }
 
@@ -762,7 +802,7 @@ namespace
             return {wrapReadBufferWithCompressionMethod(
                 std::move(uri_and_buf.second),
                 compression_method,
-                static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max)), std::nullopt};
+                static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max)), std::nullopt, format};
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
@@ -770,7 +810,7 @@ namespace
             if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url)
                 return;
 
-            auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(current_url_option, *format, format_settings, getContext());
             StorageURL::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
@@ -780,7 +820,7 @@ namespace
                 || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
                 return;
 
-            auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(current_url_option, *format, format_settings, getContext());
             StorageURL::getSchemaCache(getContext()).addColumns(key, columns);
         }
 
@@ -792,17 +832,45 @@ namespace
 
             for (const auto & options : url_options_to_check)
             {
-                auto keys = getKeysForSchemaCache(options, format, format_settings, getContext());
+                auto keys = getKeysForSchemaCache(options, *format, format_settings, getContext());
                 StorageURL::getSchemaCache(getContext()).addManyColumns(keys, columns);
             }
         }
 
+        void setFormatName(const String & format_name) override
+        {
+            format = format_name;
+        }
+
         String getLastFileName() const override { return current_url_option; }
 
+        bool supportsLastReadBufferRecreation() const override { return true; }
+
+        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
+        {
+            chassert(current_index > 0 && current_index <= url_options_to_check.size());
+            auto first_option = url_options_to_check[current_index - 1].cbegin();
+            auto uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer(
+                first_option,
+                url_options_to_check[current_index - 1].cend(),
+                getContext(),
+                {},
+                Poco::Net::HTTPRequest::HTTP_GET,
+                {},
+                getHTTPTimeouts(getContext()),
+                credentials,
+                headers,
+                false,
+                false);
+
+            return wrapReadBufferWithCompressionMethod(std::move(uri_and_buf.second), compression_method, static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
+        }
+
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(const Strings & urls)
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url)
+            auto context = getContext();
+            if (!context->getSettingsRef().schema_inference_use_cache_for_url)
                 return std::nullopt;
 
             auto & schema_cache = StorageURL::getSchemaCache(getContext());
@@ -810,7 +878,7 @@ namespace
             {
                 auto get_last_mod_time = [&]() -> std::optional<time_t>
                 {
-                    auto last_mod_time = StorageURL::tryGetLastModificationTime(url, headers, credentials, getContext());
+                    auto last_mod_time = StorageURL::tryGetLastModificationTime(url, headers, credentials, context);
                     /// Some URLs could not have Last-Modified header, in this case we cannot be sure that
                     /// data wasn't changed after adding it's schema to cache. Use schema from cache only if
                     /// special setting for this case is enabled.
@@ -819,10 +887,27 @@ namespace
                     return last_mod_time;
                 };
 
-                auto cache_key = getKeyForSchemaCache(url, format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
+                if (format)
+                {
+                    auto cache_key = getKeyForSchemaCache(url, *format, format_settings, context);
+                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        return columns;
+                }
+                else
+                {
+                    /// If format is unknown, we can iterate through all possible input formats
+                    /// and check if we have an entry with this format and this file in schema cache.
+                    /// If we have such entry for some format, we can use this format to read the file.
+                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+                    {
+                        auto cache_key = getKeyForSchemaCache(url, format_name, format_settings, context);
+                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                        {
+                            format = format_name;
+                            return columns;
+                        }
+                    }
+                }
             }
 
             return std::nullopt;
@@ -831,7 +916,7 @@ namespace
         std::vector<std::vector<String>> url_options_to_check;
         size_t current_index = 0;
         String current_url_option;
-        const String & format;
+        std::optional<String> format;
         const CompressionMethod & compression_method;
         const HTTPHeaderEntries & headers;
         Poco::Net::HTTPBasicCredentials credentials;
@@ -839,13 +924,13 @@ namespace
     };
 }
 
-ColumnsDescription IStorageURLBase::getTableStructureFromData(
-    const String & format,
+std::pair<ColumnsDescription, String> IStorageURLBase::getTableStructureAndFormatFromDataImpl(
+    std::optional<String> format,
     const String & uri,
     CompressionMethod compression_method,
     const HTTPHeaderEntries & headers,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr context)
+    const ContextPtr & context)
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(uri));
 
@@ -858,7 +943,30 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
         urls_to_check = {uri};
 
     ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context);
-    return readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context);
+    if (format)
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+}
+
+ColumnsDescription IStorageURLBase::getTableStructureFromData(
+    const String & format,
+    const String & uri,
+    CompressionMethod compression_method,
+    const HTTPHeaderEntries & headers,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context)
+{
+    return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, headers, format_settings, context).first;
+}
+
+std::pair<ColumnsDescription, String> IStorageURLBase::getTableStructureAndFormatFromData(
+    const String & uri,
+    CompressionMethod compression_method,
+    const HTTPHeaderEntries & headers,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context)
+{
+    return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, headers, format_settings, context);
 }
 
 bool IStorageURLBase::supportsSubsetOfColumns(const ContextPtr & context) const
@@ -1243,7 +1351,7 @@ StorageURL::StorageURL(
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
     const String & comment,
-    ContextPtr context_,
+    const ContextPtr & context_,
     const String & compression_method_,
     const HTTPHeaderEntries & headers_,
     const String & http_method_,
@@ -1276,7 +1384,7 @@ StorageURLWithFailover::StorageURLWithFailover(
     const std::optional<FormatSettings> & format_settings_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    ContextPtr context_,
+    const ContextPtr & context_,
     const String & compression_method_)
     : StorageURL("", table_id_, format_name_, format_settings_, columns_, constraints_, String{}, context_, compression_method_)
 {
@@ -1325,7 +1433,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum
 }
 
 size_t StorageURL::evalArgsAndCollectHeaders(
-    ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context)
+    ASTs & url_function_args, HTTPHeaderEntries & header_entries, const ContextPtr & context)
 {
     ASTs::iterator headers_it = url_function_args.end();
 
@@ -1409,7 +1517,7 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con
     configuration.structure = collection.getOrDefault<String>("structure", "auto");
 }
 
-StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr local_context)
+StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, const ContextPtr & local_context)
 {
     StorageURL::Configuration configuration;
 
@@ -1433,7 +1541,7 @@ StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr l
     }
 
     if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url).getPath(), true);
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url).getPath()).value_or("auto");
 
     for (const auto & [header, value] : configuration.headers)
     {
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index c8b8d0942f4..18a90c7bb82 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -57,7 +57,15 @@ public:
         CompressionMethod compression_method,
         const HTTPHeaderEntries & headers,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr context);
+        const ContextPtr & context);
+
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
+        const String & uri,
+        CompressionMethod compression_method,
+        const HTTPHeaderEntries & headers,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context);
+
 
     static SchemaCache & getSchemaCache(const ContextPtr & context);
 
@@ -72,7 +80,7 @@ protected:
 
     IStorageURLBase(
         const String & uri_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const StorageID & id_,
         const String & format_name_,
         const std::optional<FormatSettings> & format_settings_,
@@ -106,7 +114,7 @@ protected:
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         const SelectQueryInfo & query_info,
-        ContextPtr context,
+        const ContextPtr & context,
         QueryProcessingStage::Enum & processed_stage,
         size_t max_block_size) const;
 
@@ -114,7 +122,7 @@ protected:
         const Names & column_names,
         const ColumnsDescription & columns_description,
         const SelectQueryInfo & query_info,
-        ContextPtr context,
+        const ContextPtr & context,
         QueryProcessingStage::Enum & processed_stage,
         size_t max_block_size) const;
 
@@ -127,6 +135,14 @@ protected:
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
+    static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromDataImpl(
+        std::optional<String> format,
+        const String & uri,
+        CompressionMethod compression_method,
+        const HTTPHeaderEntries & headers,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context);
+
     virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0;
 };
 
@@ -160,7 +176,7 @@ public:
         const String & format,
         const std::optional<FormatSettings> & format_settings,
         String name_,
-        ContextPtr context,
+        const ContextPtr & context,
         UInt64 max_block_size,
         const ConnectionTimeouts & timeouts,
         CompressionMethod compression_method,
@@ -231,7 +247,7 @@ public:
         const String & format,
         const std::optional<FormatSettings> & format_settings,
         const Block & sample_block,
-        ContextPtr context,
+        const ContextPtr & context,
         const ConnectionTimeouts & timeouts,
         CompressionMethod compression_method,
         const HTTPHeaderEntries & headers = {},
@@ -263,7 +279,7 @@ public:
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
         const String & comment,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const String & compression_method_,
         const HTTPHeaderEntries & headers_ = {},
         const String & method_ = "",
@@ -292,12 +308,12 @@ public:
         std::string addresses_expr;
     };
 
-    static Configuration getConfiguration(ASTs & args, ContextPtr context);
+    static Configuration getConfiguration(ASTs & args, const ContextPtr & context);
 
     /// Does evaluateConstantExpressionOrIdentifierAsLiteral() on all arguments.
     /// If `headers(...)` argument is present, parses it and moves it to the end of the array.
     /// Returns number of arguments excluding `headers(...)`.
-    static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context);
+    static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, const ContextPtr & context);
 
     static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection);
 };
@@ -314,7 +330,7 @@ public:
         const std::optional<FormatSettings> & format_settings_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        ContextPtr context_,
+        const ContextPtr & context_,
         const String & compression_method_);
 
     void read(
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index a0b5fcd6f28..d71dfea7693 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -35,36 +35,43 @@ namespace ErrorCodes
 }
 
 StorageURLCluster::StorageURLCluster(
-    ContextPtr context_,
+    const ContextPtr & context,
     const String & cluster_name_,
     const String & uri_,
     const String & format_,
-    const String & compression_method_,
+    const String & compression_method,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    const StorageURL::Configuration & configuration_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
-    , uri(uri_)
+    const StorageURL::Configuration & configuration_)
+    : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageURLCluster (" + table_id_.table_name + ")"))
+    , uri(uri_), format_name(format_)
 {
-    context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
-    context_->getHTTPHeaderFilter().checkHeaders(configuration_.headers);
+    context->getRemoteHostFilter().checkURL(Poco::URI(uri));
+    context->getHTTPHeaderFilter().checkHeaders(configuration_.headers);
 
     StorageInMemoryMetadata storage_metadata;
 
     if (columns_.empty())
     {
-        auto columns = StorageURL::getTableStructureFromData(format_,
-            uri,
-            chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method_),
-            configuration_.headers,
-            std::nullopt,
-            context_);
+        ColumnsDescription columns;
+        if (format_name == "auto")
+            std::tie(columns, format_name) = StorageURL::getTableStructureAndFormatFromData(
+                uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context);
+        else
+            columns = StorageURL::getTableStructureFromData(
+                format_, uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context);
+
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (format_name == "auto")
+            format_name = StorageURL::getTableStructureAndFormatFromData(
+                uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context).second;
+
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -72,13 +79,14 @@ StorageURLCluster::StorageURLCluster(
     virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
+void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function urlCluster, got '{}'", queryToString(query));
 
-    TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
+    TableFunctionURLCluster::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context);
 }
 
 RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h
index 07978040029..f57d262f434 100644
--- a/src/Storages/StorageURLCluster.h
+++ b/src/Storages/StorageURLCluster.h
@@ -19,16 +19,15 @@ class StorageURLCluster : public IStorageCluster
 {
 public:
     StorageURLCluster(
-        ContextPtr context_,
+        const ContextPtr & context,
         const String & cluster_name_,
         const String & uri_,
         const String & format_,
-        const String & compression_method_,
+        const String & compression_method,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        const StorageURL::Configuration & configuration_,
-        bool structure_argument_was_provided_);
+        const StorageURL::Configuration & configuration_);
 
     std::string getName() const override { return "URLCluster"; }
 
@@ -41,11 +40,10 @@ public:
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
+    void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
 
     String uri;
     String format_name;
-    String compression_method;
     NamesAndTypesList virtual_columns;
 };
 
diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp
index a274b1ba4db..c01d0310952 100644
--- a/src/Storages/StorageXDBC.cpp
+++ b/src/Storages/StorageXDBC.cpp
@@ -59,7 +59,7 @@ std::vector<std::pair<std::string, std::string>> StorageXDBC::getReadURIParams(
     const Names & /* column_names */,
     const StorageSnapshotPtr & /*storage_snapshot*/,
     const SelectQueryInfo & /*query_info*/,
-    ContextPtr /*context*/,
+    const ContextPtr & /*context*/,
     QueryProcessingStage::Enum & /*processed_stage*/,
     size_t max_block_size) const
 {
@@ -70,7 +70,7 @@ std::function<void(std::ostream &)> StorageXDBC::getReadPOSTDataCallback(
     const Names & column_names,
     const ColumnsDescription & columns_description,
     const SelectQueryInfo & query_info,
-    ContextPtr local_context,
+    const ContextPtr & local_context,
     QueryProcessingStage::Enum & /*processed_stage*/,
     size_t /*max_block_size*/) const
 {
diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h
index fe678785dc2..9a0a9b5afa1 100644
--- a/src/Storages/StorageXDBC.h
+++ b/src/Storages/StorageXDBC.h
@@ -55,7 +55,7 @@ private:
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         const SelectQueryInfo & query_info,
-        ContextPtr context,
+        const ContextPtr & context,
         QueryProcessingStage::Enum & processed_stage,
         size_t max_block_size) const override;
 
@@ -63,7 +63,7 @@ private:
         const Names & column_names,
         const ColumnsDescription & columns_description,
         const SelectQueryInfo & query_info,
-        ContextPtr context,
+        const ContextPtr & context,
         QueryProcessingStage::Enum & processed_stage,
         size_t max_block_size) const override;
 
diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h
index 7e81d6d21b7..9f56d781bc9 100644
--- a/src/TableFunctions/ITableFunctionCluster.h
+++ b/src/TableFunctions/ITableFunctionCluster.h
@@ -4,7 +4,6 @@
 
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Storages/StorageS3Cluster.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionAzureBlobStorage.h>
@@ -29,14 +28,14 @@ public:
     String getName() const override = 0;
     String getSignature() const override = 0;
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context)
+    static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context)
     {
         if (args.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty list of arguments for {}Cluster table function", Base::name);
 
         ASTPtr cluster_name_arg = args.front();
         args.erase(args.begin());
-        Base::addColumnsStructureToArguments(args, desired_structure, context);
+        Base::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context);
         args.insert(args.begin(), cluster_name_arg);
     }
 
diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp
index b88af855309..b697f3df925 100644
--- a/src/TableFunctions/ITableFunctionFileLike.cpp
+++ b/src/TableFunctions/ITableFunctionFileLike.cpp
@@ -27,14 +27,14 @@ void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const Conte
     filename = checkAndGetLiteralArgument<String>(arg, "source");
 }
 
-String ITableFunctionFileLike::getFormatFromFirstArgument()
+std::optional<String> ITableFunctionFileLike::tryGetFormatFromFirstArgument()
 {
-    return FormatFactory::instance().getFormatFromFileName(filename, true);
+    return FormatFactory::instance().tryGetFormatFromFileName(filename);
 }
 
 bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & context)
 {
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context);
+    return format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context);
 }
 
 void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context)
@@ -63,7 +63,10 @@ void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr &
         format = checkAndGetLiteralArgument<String>(args[1], "format");
 
     if (format == "auto")
-        format = getFormatFromFirstArgument();
+    {
+        if (auto format_from_first_argument = tryGetFormatFromFirstArgument())
+            format = *format_from_first_argument;
+    }
 
     if (args.size() > 2)
     {
@@ -79,34 +82,37 @@ void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr &
         compression_method = checkAndGetLiteralArgument<String>(args[3], "compression_method");
 }
 
-void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &)
+void ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context)
 {
     if (args.empty() || args.size() > getMaxNumberOfArguments())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size());
 
+    auto format_literal = std::make_shared<ASTLiteral>(format);
     auto structure_literal = std::make_shared<ASTLiteral>(structure);
 
+    for (auto & arg : args)
+        arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
+
     /// f(filename)
     if (args.size() == 1)
     {
-        /// Add format=auto before structure argument.
-        args.push_back(std::make_shared<ASTLiteral>("auto"));
+        args.push_back(format_literal);
         args.push_back(structure_literal);
     }
     /// f(filename, format)
     else if (args.size() == 2)
     {
+        if (checkAndGetLiteralArgument<String>(args[1], "format") == "auto")
+            args.back() = format_literal;
         args.push_back(structure_literal);
     }
-    /// f(filename, format, 'auto')
-    else if (args.size() == 3)
+    /// f(filename, format, structure) or f(filename, format, structure, compression)
+    else if (args.size() >= 3)
     {
-        args.back() = structure_literal;
-    }
-    /// f(filename, format, 'auto', compression)
-    else if (args.size() == 4)
-    {
-        args[args.size() - 2] = structure_literal;
+        if (checkAndGetLiteralArgument<String>(args[1], "format") == "auto")
+            args[1] = format_literal;
+        if (checkAndGetLiteralArgument<String>(args[2], "structure") == "auto")
+            args[2] = structure_literal;
     }
 }
 
diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h
index 5fe86587797..b378f2f3a6c 100644
--- a/src/TableFunctions/ITableFunctionFileLike.h
+++ b/src/TableFunctions/ITableFunctionFileLike.h
@@ -31,7 +31,7 @@ public:
 
     static size_t getMaxNumberOfArguments() { return 4; }
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &);
+    static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &);
 
 protected:
 
@@ -39,7 +39,7 @@ protected:
     virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
 
     virtual void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context);
-    virtual String getFormatFromFirstArgument();
+    virtual std::optional<String> tryGetFormatFromFirstArgument();
 
     String filename;
     String path_to_archive;
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
index d394c836369..b9e0af53b7b 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
+++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
@@ -58,7 +58,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const
         configuration.blobs_paths = {configuration.blob_path};
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
+            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
     }
     else
     {
@@ -155,7 +155,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const
         configuration.blobs_paths = {configuration.blob_path};
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
+            configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto");
     }
 }
 
@@ -174,15 +174,24 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function,
     parseArgumentsImpl(args, context);
 }
 
-void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
+void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context)
 {
-    if (tryGetNamedCollectionWithOverrides(args, context))
+    if (auto collection = tryGetNamedCollectionWithOverrides(args, context))
     {
-        /// In case of named collection, just add key-value pair "structure='...'"
-        /// at the end of arguments to override existed structure.
-        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
-        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
-        args.push_back(equal_func);
+        /// In case of named collection, just add key-value pairs "format='...', structure='...'"
+        /// at the end of arguments to override existed format and structure with "auto" values.
+        if (collection->getOrDefault<String>("format", "auto") == "auto")
+        {
+            ASTs format_equal_func_args = {std::make_shared<ASTIdentifier>("format"), std::make_shared<ASTLiteral>(format)};
+            auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args));
+            args.push_back(format_equal_func);
+        }
+        if (collection->getOrDefault<String>("structure", "auto") == "auto")
+        {
+            ASTs structure_equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
+            auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args));
+            args.push_back(structure_equal_func);
+        }
     }
     else
     {
@@ -191,65 +200,126 @@ void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args,
                             "Storage Azure requires 3 to 7 arguments: "
                             "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
 
+        auto format_literal = std::make_shared<ASTLiteral>(format);
         auto structure_literal = std::make_shared<ASTLiteral>(structure);
 
+        for (auto & arg : args)
+            arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
+
         auto is_format_arg
             = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
 
-
+        /// (connection_string, container_name, blobpath)
         if (args.size() == 3)
         {
-            /// Add format=auto & compression=auto before structure argument.
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(format_literal);
+            /// Add compression = "auto" before structure argument.
             args.push_back(std::make_shared<ASTLiteral>("auto"));
             args.push_back(structure_literal);
         }
+        /// (connection_string, container_name, blobpath, structure) or
+        /// (connection_string, container_name, blobpath, format)
+        /// We can distinguish them by looking at the 4-th argument: check if it's format name or not.
         else if (args.size() == 4)
         {
             auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
+            /// (..., format) -> (..., format, compression, structure)
             if (is_format_arg(fourth_arg))
             {
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
                 /// Add compression=auto before structure argument.
                 args.push_back(std::make_shared<ASTLiteral>("auto"));
                 args.push_back(structure_literal);
             }
+            /// (..., structure) -> (..., format, compression, structure)
             else
             {
-                args.back() = structure_literal;
+                auto structure_arg = args.back();
+                args[3] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (fourth_arg == "auto")
+                    args.push_back(structure_literal);
+                else
+                    args.push_back(structure_arg);
             }
         }
+        /// (connection_string, container_name, blobpath, format, compression) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key)
+        /// We can distinguish them by looking at the 4-th argument: check if it's format name or not.
         else if (args.size() == 5)
         {
             auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
+            /// (..., format, compression) -> (..., format, compression, structure)
+            if (is_format_arg(fourth_arg))
             {
-                /// Add format=auto & compression=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
+                args.push_back(structure_literal);
             }
-            args.push_back(structure_literal);
-        }
-        else if (args.size() == 6)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
+            /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure)
+            else
             {
+                args.push_back(format_literal);
                 /// Add compression=auto before structure argument.
                 args.push_back(std::make_shared<ASTLiteral>("auto"));
                 args.push_back(structure_literal);
             }
+        }
+        /// (connection_string, container_name, blobpath, format, compression, structure) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format)
+        else if (args.size() == 6)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
+            auto sixth_arg = checkAndGetLiteralArgument<String>(args[5], "format/structure");
+
+            /// (..., format, compression, structure)
+            if (is_format_arg(fourth_arg))
+            {
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[5], "structure") == "auto")
+                    args[5] = structure_literal;
+            }
+            /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure)
+            else if (is_format_arg(sixth_arg))
+            {
+                if (sixth_arg == "auto")
+                    args[5] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(structure_literal);
+            }
+            /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure)
             else
             {
-                args.back() = structure_literal;
+                auto structure_arg = args.back();
+                args[5] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (sixth_arg == "auto")
+                    args.push_back(structure_literal);
+                else
+                    args.push_back(structure_arg);
             }
         }
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression)
         else if (args.size() == 7)
         {
+            /// (..., format, compression) -> (..., format, compression, structure)
+            if (checkAndGetLiteralArgument<String>(args[5], "format") == "auto")
+                args[5] = format_literal;
             args.push_back(structure_literal);
         }
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
         else if (args.size() == 8)
         {
-            args.back() = structure_literal;
+            if (checkAndGetLiteralArgument<String>(args[5], "format") == "auto")
+                args[5] = format_literal;
+            if (checkAndGetLiteralArgument<String>(args[7], "structure") == "auto")
+                args[7] = structure_literal;
         }
     }
 }
@@ -263,7 +333,9 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
         auto settings = StorageAzureBlob::createSettings(context);
 
         auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
-        return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
+        if (configuration.format == "auto")
+            return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first;
+        return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context);
     }
 
     return parseColumnsListFromString(configuration.structure, context);
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h
index 1a221f60c55..9622881b417 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.h
+++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h
@@ -55,7 +55,7 @@ public:
 
     virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
+    static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context);
 
 protected:
 
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
index eee585967c2..a2221cf35b6 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
@@ -21,9 +21,8 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
 {
     StoragePtr storage;
     ColumnsDescription columns;
-    bool structure_argument_was_provided = configuration.structure != "auto";
 
-    if (structure_argument_was_provided)
+    if (configuration.structure != "auto")
     {
         columns = parseColumnsListFromString(configuration.structure, context);
     }
@@ -59,8 +58,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
             StorageID(getDatabaseName(), table_name),
             columns,
             ConstraintsDescription{},
-            context,
-            structure_argument_was_provided);
+            context);
     }
 
     storage->startup();
diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp
index 8a9dde374ec..b481076e9b6 100644
--- a/src/TableFunctions/TableFunctionFile.cpp
+++ b/src/TableFunctions/TableFunctionFile.cpp
@@ -54,12 +54,12 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first argument of table function '{}' mush be path or file descriptor", getName());
 }
 
-String TableFunctionFile::getFormatFromFirstArgument()
+std::optional<String> TableFunctionFile::tryGetFormatFromFirstArgument()
 {
     if (fd >= 0)
-        return FormatFactory::instance().getFormatFromFileDescriptor(fd);
+        return FormatFactory::instance().tryGetFormatFromFileDescriptor(fd);
     else
-        return FormatFactory::instance().getFormatFromFileName(filename, true);
+        return FormatFactory::instance().tryGetFormatFromFileName(filename);
 }
 
 StoragePtr TableFunctionFile::getStorage(const String & source,
@@ -104,10 +104,11 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
             archive_info
                 = StorageFile::getArchiveInfo(path_to_archive, filename, context->getUserFilesPath(), context, total_bytes_to_read);
 
+        if (format == "auto")
+            return StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context, archive_info).first;
         return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, archive_info);
     }
 
-
     return parseColumnsListFromString(structure, context);
 }
 
diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h
index 6eaab29db8a..1347284753e 100644
--- a/src/TableFunctions/TableFunctionFile.h
+++ b/src/TableFunctions/TableFunctionFile.h
@@ -27,7 +27,7 @@ public:
 protected:
     int fd = -1;
     void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override;
-    String getFormatFromFirstArgument() override;
+    std::optional<String> tryGetFormatFromFirstArgument() override;
 
 private:
     StoragePtr getStorage(
diff --git a/src/TableFunctions/TableFunctionFileCluster.cpp b/src/TableFunctions/TableFunctionFileCluster.cpp
index 843909e2a58..3e53349b022 100644
--- a/src/TableFunctions/TableFunctionFileCluster.cpp
+++ b/src/TableFunctions/TableFunctionFileCluster.cpp
@@ -43,8 +43,7 @@ StoragePtr TableFunctionFileCluster::getStorage(
             compression_method,
             StorageID(getDatabaseName(), table_name),
             columns,
-            ConstraintsDescription{},
-            structure != "auto");
+            ConstraintsDescription{});
     }
 
     return storage;
diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp
index 4b6d0f70c0a..ad2a142a140 100644
--- a/src/TableFunctions/TableFunctionFormat.cpp
+++ b/src/TableFunctions/TableFunctionFormat.cpp
@@ -33,7 +33,9 @@ namespace ErrorCodes
 namespace
 {
 
-/* format(format_name, data) - ...
+/* format(format_name, structure, data) - parses data according to the specified format and structure.
+ * format(format_name, data) - infers the schema from the data and parses it according to the specified format.
+ * format(data) - detects the format, infers the schema and parses data according to inferred format and structure.
  */
 class TableFunctionFormat : public ITableFunction
 {
@@ -49,11 +51,11 @@ private:
     ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
 
-    Block parseData(ColumnsDescription columns, ContextPtr context) const;
+    Block parseData(const ColumnsDescription & columns, const String & format_name, const ContextPtr & context) const;
 
-    String format;
-    String data;
+    String format = "auto";
     String structure = "auto";
+    String data;
 };
 
 void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr context)
@@ -65,14 +67,15 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr
 
     ASTs & args = args_func.at(0)->children;
 
-    if (args.size() != 2 && args.size() != 3)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 2 or 3 arguments: format, [structure], data", getName());
+    if (args.empty() || args.size() > 3)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires from 1 to 3 arguments: [format, [structure]], data", getName());
 
     for (auto & arg : args)
         arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
 
-    format = checkAndGetLiteralArgument<String>(args[0], "format");
     data = checkAndGetLiteralArgument<String>(args.back(), "data");
+    if (args.size() > 1)
+        format = checkAndGetLiteralArgument<String>(args[0], "format");
     if (args.size() == 3)
         structure = checkAndGetLiteralArgument<String>(args[1], "structure");
 }
@@ -82,19 +85,21 @@ ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr conte
     if (structure == "auto")
     {
         SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
-        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, false, context);
+        if (format == "auto")
+            return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context).first;
+        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
     }
     return parseColumnsListFromString(structure, context);
 }
 
-Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const
+Block TableFunctionFormat::parseData(const ColumnsDescription & columns, const String & format_name, const ContextPtr & context) const
 {
     Block block;
     for (const auto & name_and_type : columns.getAllPhysical())
         block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
 
     auto read_buf = std::make_unique<ReadBufferFromString>(data);
-    auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size);
+    auto input_format = context->getInputFormat(format_name, *read_buf, block, context->getSettingsRef().max_block_size);
     QueryPipelineBuilder builder;
     builder.init(Pipe(input_format));
     if (columns.hasDefaults())
@@ -120,10 +125,24 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont
     return concatenateBlocks(blocks);
 }
 
-StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
+StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const
 {
-    auto columns = getActualTableStructure(context, is_insert_query);
-    Block res_block = parseData(columns, context);
+    ColumnsDescription columns;
+    String format_name = format;
+    if (structure == "auto")
+    {
+        SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
+        if (format_name == "auto")
+            std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context);
+        else
+            columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
+    }
+    else
+    {
+        columns = parseColumnsListFromString(structure, context);
+    }
+
+    Block res_block = parseData(columns, format_name, context);
     auto res = std::make_shared<StorageValues>(StorageID(getDatabaseName(), table_name), columns, res_block);
     res->startup();
     return res;
diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp
index 8d48a7ba30e..2dac4398144 100644
--- a/src/TableFunctions/TableFunctionHDFS.cpp
+++ b/src/TableFunctions/TableFunctionHDFS.cpp
@@ -33,6 +33,8 @@ ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context
     if (structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
+        if (format == "auto")
+            return StorageHDFS::getTableStructureAndFormatFromData(filename, compression_method, context).first;
         return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context);
     }
 
diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp
index 6fb7ed0fce5..57ce6d2b9ff 100644
--- a/src/TableFunctions/TableFunctionHDFSCluster.cpp
+++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp
@@ -45,8 +45,7 @@ StoragePtr TableFunctionHDFSCluster::getStorage(
             format,
             columns,
             ConstraintsDescription{},
-            compression_method,
-            structure != "auto");
+            compression_method);
     }
     return storage;
 }
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index a9c5a5c99f0..3fedd38277c 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -61,12 +61,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
         if (configuration.format == "auto")
         {
             String file_path = named_collection->getOrDefault<String>("filename", Poco::URI(named_collection->get<String>("url")).getPath());
-            configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true);
+            configuration.format = FormatFactory::instance().tryGetFormatFromFileName(file_path).value_or("auto");
         }
     }
     else
     {
-
         size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context);
 
         if (count == 0 || count > 7)
@@ -216,7 +215,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
         configuration.auth_settings.no_sign_request = no_sign_request;
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true);
+            configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(url).getPath()).value_or("auto");
     }
 
     configuration.keys = {configuration.url.key};
@@ -238,15 +237,24 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
     parseArgumentsImpl(args, context);
 }
 
-void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
+void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context)
 {
-    if (tryGetNamedCollectionWithOverrides(args, context))
+    if (auto collection = tryGetNamedCollectionWithOverrides(args, context))
     {
-        /// In case of named collection, just add key-value pair "structure='...'"
-        /// at the end of arguments to override existed structure.
-        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
-        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
-        args.push_back(equal_func);
+        /// In case of named collection, just add key-value pairs "format='...', structure='...'"
+        /// at the end of arguments to override existed format and structure with "auto" values.
+        if (collection->getOrDefault<String>("format", "auto") == "auto")
+        {
+            ASTs format_equal_func_args = {std::make_shared<ASTIdentifier>("format"), std::make_shared<ASTLiteral>(format)};
+            auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args));
+            args.push_back(format_equal_func);
+        }
+        if (collection->getOrDefault<String>("structure", "auto") == "auto")
+        {
+            ASTs structure_equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
+            auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args));
+            args.push_back(structure_equal_func);
+        }
     }
     else
     {
@@ -256,23 +264,25 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String &
         if (count == 0 || count > getMaxNumberOfArguments())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count);
 
+        auto format_literal = std::make_shared<ASTLiteral>(format);
         auto structure_literal = std::make_shared<ASTLiteral>(structure);
 
-        /// s3(s3_url)
+        /// s3(s3_url) -> s3(s3_url, format, structure)
         if (count == 1)
         {
-            /// Add format=auto before structure argument.
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(format_literal);
             args.push_back(structure_literal);
         }
-        /// s3(s3_url, format) or s3(s3_url, NOSIGN)
+        /// s3(s3_url, format) -> s3(s3_url, format, structure) or
+        /// s3(s3_url, NOSIGN) -> s3(s3_url, NOSIGN, format, structure)
         /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
         else if (count == 2)
         {
             auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            /// If there is NOSIGN, add format=auto before structure.
             if (boost::iequals(second_arg, "NOSIGN"))
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(format_literal);
+            else if (second_arg == "auto")
+                args.back() = format_literal;
             args.push_back(structure_literal);
         }
         /// s3(source, format, structure) or
@@ -282,18 +292,25 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String &
         else if (count == 3)
         {
             auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            /// s3(source, NOSIGN, format) -> s3(source, NOSIGN, format, structure)
             if (boost::iequals(second_arg, "NOSIGN"))
             {
+                if (checkAndGetLiteralArgument<String>(args[2], "format") == "auto")
+                    args.back() = format_literal;
                 args.push_back(structure_literal);
             }
+            /// s3(source, format, structure)
             else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
             {
-                args[count - 1] = structure_literal;
+                if (second_arg == "auto")
+                    args[1] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[2], "structure") == "auto")
+                    args[2] = structure_literal;
             }
+            /// s3(source, access_key_id, access_key_id) -> s3(source, access_key_id, access_key_id, format, structure)
             else
             {
-                /// Add format=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(format_literal);
                 args.push_back(structure_literal);
             }
         }
@@ -304,16 +321,27 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String &
         else if (count == 4)
         {
             auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            /// s3(source, NOSIGN, format, structure)
             if (boost::iequals(second_arg, "NOSIGN"))
             {
-                args[count - 1] = structure_literal;
+                if (checkAndGetLiteralArgument<String>(args[2], "format") == "auto")
+                    args[2] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[3], "structure") == "auto")
+                    args[3] = structure_literal;
             }
+            /// s3(source, format, structure, compression_method)
             else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
             {
-                args[count - 2] = structure_literal;
+                if (second_arg == "auto")
+                    args[1] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[2], "structure") == "auto")
+                    args[2] = structure_literal;
             }
+            /// s3(source, access_key_id, access_key_id, format) -> s3(source, access_key_id, access_key_id, format, structure)
             else
             {
+                if (checkAndGetLiteralArgument<String>(args[3], "format") == "auto")
+                    args[3] = format_literal;
                 args.push_back(structure_literal);
             }
         }
@@ -323,19 +351,30 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String &
         else if (count == 5)
         {
             auto sedond_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            /// s3(source, NOSIGN, format, structure, compression_method)
             if (boost::iequals(sedond_arg, "NOSIGN"))
             {
-                args[count - 2] = structure_literal;
+                if (checkAndGetLiteralArgument<String>(args[2], "format") == "auto")
+                    args[2] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[3], "structure") == "auto")
+                    args[3] = structure_literal;
             }
+            /// s3(source, access_key_id, access_key_id, format, structure)
             else
             {
-                args[count - 1] = structure_literal;
+                if (checkAndGetLiteralArgument<String>(args[3], "format") == "auto")
+                    args[3] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[4], "structure") == "auto")
+                    args[4] = structure_literal;
             }
         }
         /// s3(source, access_key_id, secret_access_key, format, structure, compression)
         else if (count == 6)
         {
-            args[count - 2] = structure_literal;
+            if (checkAndGetLiteralArgument<String>(args[3], "format") == "auto")
+                args[3] = format_literal;
+            if (checkAndGetLiteralArgument<String>(args[4], "structure") == "auto")
+                args[4] = structure_literal;
         }
     }
 }
@@ -346,6 +385,9 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context,
     {
         context->checkAccess(getSourceAccessType());
         configuration.update(context);
+        if (configuration.format == "auto")
+            return StorageS3::getTableStructureAndFormatFromData(configuration, std::nullopt, context).first;
+
         return StorageS3::getTableStructureFromData(configuration, std::nullopt, context);
     }
 
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index fa73c1d313e..00ca36c6653 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -57,7 +57,7 @@ public:
 
     virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
+    static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context);
 
 protected:
 
diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp
index ce96f7f580b..e727c4e4c89 100644
--- a/src/TableFunctions/TableFunctionS3Cluster.cpp
+++ b/src/TableFunctions/TableFunctionS3Cluster.cpp
@@ -21,9 +21,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
 {
     StoragePtr storage;
     ColumnsDescription columns;
-    bool structure_argument_was_provided = configuration.structure != "auto";
 
-    if (structure_argument_was_provided)
+    if (configuration.structure != "auto")
     {
         columns = parseColumnsListFromString(configuration.structure, context);
     }
@@ -53,8 +52,7 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
             StorageID(getDatabaseName(), table_name),
             columns,
             ConstraintsDescription{},
-            context,
-            structure_argument_was_provided);
+            context);
     }
 
     storage->startup();
diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp
index aa535991d65..a78b2affa9a 100644
--- a/src/TableFunctions/TableFunctionURL.cpp
+++ b/src/TableFunctions/TableFunctionURL.cpp
@@ -55,7 +55,7 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex
 
         format = configuration.format;
         if (format == "auto")
-            format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true);
+            format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath()).value_or("auto");
 
         StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context);
     }
@@ -78,15 +78,24 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex
     }
 }
 
-void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context)
+void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context)
 {
-    if (tryGetNamedCollectionWithOverrides(args, context))
+    if (auto collection = tryGetNamedCollectionWithOverrides(args, context))
     {
-        /// In case of named collection, just add key-value pair "structure='...'"
-        /// at the end of arguments to override existed structure.
-        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(desired_structure)};
-        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
-        args.push_back(equal_func);
+        /// In case of named collection, just add key-value pairs "format='...', structure='...'"
+        /// at the end of arguments to override existed format and structure with "auto" values.
+        if (collection->getOrDefault<String>("format", "auto") == "auto")
+        {
+            ASTs format_equal_func_args = {std::make_shared<ASTIdentifier>("format"), std::make_shared<ASTLiteral>(format_)};
+            auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args));
+            args.push_back(format_equal_func);
+        }
+        if (collection->getOrDefault<String>("structure", "auto") == "auto")
+        {
+            ASTs structure_equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure_)};
+            auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args));
+            args.push_back(structure_equal_func);
+        }
     }
     else
     {
@@ -101,7 +110,7 @@ void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String
             args.pop_back();
         }
 
-        ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context);
+        ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context);
 
         if (headers_ast)
             args.push_back(headers_ast);
@@ -131,6 +140,14 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context,
     if (structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
+        if (format == "auto")
+            return StorageURL::getTableStructureAndFormatFromData(
+                       filename,
+                       chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method),
+                       configuration.headers,
+                       std::nullopt,
+                       context).first;
+
         return StorageURL::getTableStructureFromData(format,
             filename,
             chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method),
@@ -148,9 +165,9 @@ std::unordered_set<String> TableFunctionURL::getVirtualsToCheckBeforeUsingStruct
     return {virtual_column_names.begin(), virtual_column_names.end()};
 }
 
-String TableFunctionURL::getFormatFromFirstArgument()
+std::optional<String> TableFunctionURL::tryGetFormatFromFirstArgument()
 {
-    return FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true);
+    return FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath());
 }
 
 void registerTableFunctionURL(TableFunctionFactory & factory)
diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h
index bf417f950c0..54e223283ba 100644
--- a/src/TableFunctions/TableFunctionURL.h
+++ b/src/TableFunctions/TableFunctionURL.h
@@ -34,7 +34,7 @@ public:
 
     ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context);
+    static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context);
 
     std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
 
@@ -53,8 +53,7 @@ private:
 
     const char * getStorageTypeName() const override { return "URL"; }
 
-    String getFormatFromFirstArgument() override;
-
+    std::optional<String> tryGetFormatFromFirstArgument() override;
 };
 
 }
diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp
index a2949278155..5fd3c3342a5 100644
--- a/src/TableFunctions/TableFunctionURLCluster.cpp
+++ b/src/TableFunctions/TableFunctionURLCluster.cpp
@@ -40,8 +40,7 @@ StoragePtr TableFunctionURLCluster::getStorage(
             StorageID(getDatabaseName(), table_name),
             getActualTableStructure(context, /* is_insert_query */ true),
             ConstraintsDescription{},
-            configuration,
-            structure != "auto");
+            configuration);
     }
     return storage;
 }
diff --git a/tests/integration/test_file_cluster/test.py b/tests/integration/test_file_cluster/test.py
index d75cd6c7d23..5d12407e3f2 100644
--- a/tests/integration/test_file_cluster/test.py
+++ b/tests/integration/test_file_cluster/test.py
@@ -123,3 +123,91 @@ def test_no_such_files(started_cluster):
     distributed = node.query(get_query("*", True, "3,4"))
 
     assert TSV(local) == TSV(distributed)
+
+
+def test_schema_inference(started_cluster):
+    node = started_cluster.instances["s0_0_0"]
+
+    expected_result = node.query(
+        "select * from file('file*.csv', 'CSV', 's String, i UInt32') ORDER BY (i, s)"
+    )
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv') ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', CSV) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', auto, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', CSV, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', auto, auto, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file*.csv', CSV, auto, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+
+def test_format_detection(started_cluster):
+    for node_name in ("s0_0_0", "s0_0_1", "s0_1_0"):
+        for i in range(1, 3):
+            started_cluster.instances[node_name].query(
+                f"""
+                INSERT INTO TABLE FUNCTION file(
+                    'file_for_format_detection_{i}', 'CSV', 's String, i UInt32') VALUES ('file{i}',{i})
+                    """
+            )
+
+    node = started_cluster.instances["s0_0_0"]
+    expected_result = node.query(
+        "select * from file('file_for_format_detection*', 'CSV', 's String, i UInt32') ORDER BY (i, s)"
+    )
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*') ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*', auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*', auto, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*', auto, 's String, i UInt32') ORDER BY (i, s)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*', auto, auto, auto) ORDER BY (c1, c2)"
+    )
+    assert result == expected_result
+
+    result = node.query(
+        "select * from fileCluster('my_cluster', 'file_for_format_detection*', auto, 's String, i UInt32', auto) ORDER BY (i, s)"
+    )
+    assert result == expected_result
diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py
index 673ca318c92..03919ee6a4d 100644
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@@ -35,7 +35,9 @@ def create_buckets_s3(cluster):
 
             # Make all files a bit different
             for number in range(100 + file_number):
-                data.append([str(number + file_number) * 10, number + file_number])
+                data.append(
+                    ["str_" + str(number + file_number) * 10, number + file_number]
+                )
 
             writer = csv.writer(f)
             writer.writerows(data)
@@ -427,3 +429,33 @@ def test_cluster_with_named_collection(started_cluster):
     )
 
     assert TSV(pure_s3) == TSV(s3_cluster)
+
+
+def test_cluster_format_detection(started_cluster):
+    node = started_cluster.instances["s0_0_0"]
+
+    expected_desc_result = node.query(
+        "desc s3('http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV')"
+    )
+
+    desc_result = node.query(
+        "desc s3('http://minio1:9001/root/data/generated/*', 'minio', 'minio123')"
+    )
+
+    assert expected_desc_result == desc_result
+
+    expected_result = node.query(
+        "SELECT * FROM s3('http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64') order by a, b"
+    )
+
+    result = node.query(
+        "SELECT * FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123') order by c1, c2"
+    )
+
+    assert result == expected_result
+
+    result = node.query(
+        "SELECT * FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123', auto, 'a String, b UInt64') order by a, b"
+    )
+
+    assert result == expected_result
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 3cccd07c134..75ef50ec12a 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1250,3 +1250,73 @@ def test_size_virtual_column(cluster):
         result
         == "test_size_virtual_column1.tsv\t2\ntest_size_virtual_column2.tsv\t3\ntest_size_virtual_column3.tsv\t4\n"
     )
+
+
+def test_format_detection(cluster):
+    node = cluster.instances["node"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
+    account_name = "devstoreaccount1"
+    account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0)",
+    )
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(10)",
+    )
+
+    expected_desc_result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'auto')",
+    )
+
+    desc_result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}')",
+    )
+
+    assert expected_desc_result == desc_result
+
+    expected_result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String')",
+    )
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}')",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', auto, auto, 'x UInt64, y String')",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection{{0,1}}', '{account_name}', '{account_key}')",
+    )
+
+    assert result == expected_result
+
+    node.query(f"system drop schema cache for hdfs")
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection{{0,1}}', '{account_name}', '{account_key}')",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection{{0,1}}', '{account_name}', '{account_key}')",
+    )
+
+    assert result == expected_result
diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
index 2bd3f24d25f..6c5e2d20ca5 100644
--- a/tests/integration/test_storage_azure_blob_storage/test_cluster.py
+++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
@@ -262,3 +262,72 @@ def test_partition_parallel_reading_with_cluster(cluster):
     )
 
     assert azure_cluster == "3\n"
+
+
+def test_format_detection(cluster):
+    node = cluster.instances["node_0"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
+    account_name = "devstoreaccount1"
+    account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10)",
+    )
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10)",
+    )
+
+    expected_desc_result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'auto')",
+    )
+
+    desc_result = azure_query(
+        node,
+        f"desc azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}')",
+    )
+
+    assert expected_desc_result == desc_result
+
+    expected_result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') order by x",
+    )
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}') order by x",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', auto) order by x",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', auto, auto) order by x",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', 'x UInt32, y String') order by x",
+    )
+
+    assert result == expected_result
+
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorageCluster('simple_cluster', '{storage_account_url}', 'cont', 'test_format_detection*', '{account_name}', '{account_key}', auto, auto, 'x UInt32, y String') order by x",
+    )
+
+    assert result == expected_result
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 8ed1e4b6c0e..8dee15f4d94 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1047,6 +1047,74 @@ def test_union_schema_inference_mode(started_cluster):
     assert "Cannot extract table structure" in error
 
 
+def test_format_detection(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    node.query(
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection0', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(0)"
+    )
+
+    node.query(
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(10)"
+    )
+
+    expected_desc_result = node.query(
+        "desc hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow)"
+    )
+
+    desc_result = node.query("desc hdfs('hdfs://hdfs1:9000/test_format_detection1')")
+
+    assert expected_desc_result == desc_result
+
+    expected_result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow, 'x UInt64, y String') order by x, y"
+    )
+
+    result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_format_detection1') order by x, y"
+    )
+
+    assert expected_result == result
+
+    result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_format_detection1', auto, 'x UInt64, y String') order by x, y"
+    )
+
+    assert expected_result == result
+
+    result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_format_detection{0,1}') order by x, y"
+    )
+
+    assert expected_result == result
+
+    node.query("system drop schema cache for hdfs")
+
+    result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_format_detection{0,1}') order by x, y"
+    )
+
+    assert expected_result == result
+
+    result = node.query(
+        "select * from hdfsCluster(test_cluster_two_shards, 'hdfs://hdfs1:9000/test_format_detection{0,1}') order by x, y"
+    )
+
+    assert expected_result == result
+
+    result = node.query(
+        "select * from hdfsCluster(test_cluster_two_shards, 'hdfs://hdfs1:9000/test_format_detection{0,1}', auto, auto) order by x, y"
+    )
+
+    assert expected_result == result
+
+    result = node.query(
+        "select * from hdfsCluster(test_cluster_two_shards, 'hdfs://hdfs1:9000/test_format_detection{0,1}', auto, 'x UInt64, y String') order by x, y"
+    )
+
+    assert expected_result == result
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 2549cb0d473..365ade7da65 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -2194,3 +2194,57 @@ def test_union_schema_inference_mode(started_cluster):
             f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
         )
         assert "Cannot extract table structure" in error
+
+
+def test_s3_format_detection(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+
+    instance.query(
+        f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection0', 'JSONEachRow', 'x UInt64, y String') select number, 'str_' || toString(number) from numbers(0) settings s3_truncate_on_insert=1"
+    )
+
+    instance.query(
+        f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow', 'x UInt64, y String') select number, 'str_' || toString(number) from numbers(5) settings s3_truncate_on_insert=1"
+    )
+
+    expected_result = instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow', 'x UInt64, y String')"
+    )
+
+    expected_desc_result = instance.query(
+        f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow')"
+    )
+
+    for engine in ["s3", "url"]:
+        desc_result = instance.query(
+            f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1')"
+        )
+
+        assert desc_result == expected_desc_result
+
+        result = instance.query(
+            f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1')"
+        )
+
+        assert result == expected_result
+
+        result = instance.query(
+            f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', auto, 'x UInt64, y String')"
+        )
+
+        assert result == expected_result
+
+        result = instance.query(
+            f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection{{0,1}}', auto, 'x UInt64, y String')"
+        )
+
+        assert result == expected_result
+
+        instance.query(f"system drop schema cache for {engine}")
+
+        result = instance.query(
+            f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection{{0,1}}', auto, 'x UInt64, y String')"
+        )
+
+        assert result == expected_result
diff --git a/tests/queries/0_stateless/02969_auto_format_detection.reference b/tests/queries/0_stateless/02969_auto_format_detection.reference
new file mode 100644
index 00000000000..4b86be04996
--- /dev/null
+++ b/tests/queries/0_stateless/02969_auto_format_detection.reference
@@ -0,0 +1,123 @@
+Parquet
+a	Nullable(UInt64)					
+b	Nullable(String)					
+c	Array(Nullable(UInt64))					
+d	Tuple(\n    a Nullable(UInt64),\n    b Nullable(String))					
+ORC
+a	Nullable(Int64)					
+b	Nullable(String)					
+c	Array(Nullable(Int64))					
+d	Tuple(\n    a Nullable(Int64),\n    b Nullable(String))					
+Arrow
+a	Nullable(UInt64)					
+b	Nullable(String)					
+c	Array(Nullable(UInt64))					
+d	Tuple(\n    a Nullable(UInt64),\n    b Nullable(String))					
+ArrowStream
+a	Nullable(UInt64)					
+b	Nullable(String)					
+c	Array(Nullable(UInt64))					
+d	Tuple(\n    a Nullable(UInt64),\n    b Nullable(String))					
+Avro
+a	Int64					
+b	String					
+c	Array(Int64)					
+d	Tuple(\n    a Int64,\n    b String)					
+Native
+a	UInt64					
+b	String					
+c	Array(UInt64)					
+d	Tuple(\n    a UInt64,\n    b String)					
+BSONEachRow
+a	Nullable(Int64)					
+b	Nullable(String)					
+c	Array(Nullable(Int64))					
+d	Tuple(\n    a Nullable(Int64),\n    b Nullable(String))					
+JSONCompact
+a	UInt64					
+b	String					
+c	Array(UInt64)					
+d	Tuple(\n    a UInt64,\n    b String)					
+Values
+c1	Nullable(UInt64)					
+c2	Nullable(String)					
+c3	Array(Nullable(UInt64))					
+c4	Tuple(Nullable(UInt64), Nullable(String))					
+TSKV
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(UInt64))					
+d	Nullable(String)					
+JSONObjectEachRow
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+JSONColumns
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+JSONCompactColumns
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Array(Nullable(String))					
+c4	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+JSONCompact
+a	UInt64					
+b	String					
+c	Array(UInt64)					
+d	Tuple(\n    a UInt64,\n    b String)					
+JSON
+a	UInt64					
+b	String					
+c	Array(UInt64)					
+d	Tuple(\n    a UInt64,\n    b String)					
+TSV
+c1	Nullable(UInt64)					
+c2	Nullable(String)					
+c3	Array(Nullable(UInt64))					
+c4	Tuple(Nullable(UInt64), Nullable(String))					
+CSV
+c1	Nullable(UInt64)					
+c2	Nullable(String)					
+c3	Array(Nullable(UInt64))					
+c4	Nullable(UInt64)					
+c5	Nullable(String)					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	UInt64					
+b	String					
+c	Array(UInt64)					
+d	Tuple(\n    a UInt64,\n    b String)					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+1
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
+a	Nullable(String)					
+b	Nullable(String)					
+c	Array(Nullable(String))					
+d	Tuple(\n    a Nullable(String),\n    b Nullable(String))					
diff --git a/tests/queries/0_stateless/02969_auto_format_detection.sh b/tests/queries/0_stateless/02969_auto_format_detection.sh
new file mode 100755
index 00000000000..5b9b4e09efa
--- /dev/null
+++ b/tests/queries/0_stateless/02969_auto_format_detection.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data
+
+for format in Parquet ORC Arrow ArrowStream Avro Native BSONEachRow JSONCompact Values TSKV JSONObjectEachRow JSONColumns JSONCompactColumns JSONCompact JSON TSV CSV
+do
+    echo $format
+    $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE')"
+done
+
+rm $DATA_FILE
+
+$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE.jsonl
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE*')"
+
+
+$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE
+
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE', auto, 'a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)')"
+
+$CLICKHOUSE_LOCAL -nmq "
+desc file('$DATA_FILE');
+desc file('$DATA_FILE');
+"
+
+$CLICKHOUSE_LOCAL -nmq "
+desc file('$DATA_FILE', JSONEachRow);
+desc file('$DATA_FILE');
+"
+
+touch $DATA_FILE.1
+$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE.2
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE.{1,2}')"
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE.{1,2}') settings schema_inference_mode='union'" 2>&1 | grep -c "CANNOT_DETECT_FORMAT"
+
+$CLICKHOUSE_LOCAL -nmq "
+desc file('$DATA_FILE.2');
+desc file('$DATA_FILE.{1,2}');
+"
+
+rm $DATA_FILE*

From f05174e441f8efaa732f9e717f46259a9a8e479b Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 22 Jan 2024 23:28:17 +0000
Subject: [PATCH 125/884] Fix style

---
 docs/en/interfaces/schema-inference.md | 2 +-
 src/Formats/ReadSchemaUtils.cpp        | 2 +-
 src/Storages/IStorageCluster.h         | 2 +-
 src/Storages/StorageAzureBlob.cpp      | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index d255688da1f..3d3ee5c83d6 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -1988,7 +1988,7 @@ Note:
 - If you have a lot of files, reading schema from all of them can take a lot of time.
 
 
-## Automatic format detection {#autimatic-format-detection}
+## Automatic format detection {#automatic-format-detection}
 
 If data format is not specified and cannot be determined by the file extension, ClickHouse will try to detect the file format by its content.
 
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index b4fba7b9ce6..c882f15b4b0 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -379,7 +379,7 @@ try
 
                     /// We choose the format with larger number of columns in inferred schema.
                     size_t max_number_of_columns = 0;
-                    for (const auto & [format_to_detect, schema] : format_to_schema )
+                    for (const auto & [format_to_detect, schema] : format_to_schema)
                     {
                         if (schema.size() > max_number_of_columns)
                         {
diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h
index 28ebda5125e..92d9a84b758 100644
--- a/src/Storages/IStorageCluster.h
+++ b/src/Storages/IStorageCluster.h
@@ -41,7 +41,7 @@ public:
 
 protected:
     virtual void updateBeforeRead(const ContextPtr &) {}
-    virtual void updateQueryToSendIfNeeded(ASTPtr & /*query*/,  const StorageSnapshotPtr & /*storage_snapshot*/, const ContextPtr & /*context*/) {}
+    virtual void updateQueryToSendIfNeeded(ASTPtr & /*query*/, const StorageSnapshotPtr & /*storage_snapshot*/, const ContextPtr & /*context*/) {}
 
 private:
     Poco::Logger * log;
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 888d360aff1..35072dc5cae 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1335,7 +1335,7 @@ namespace
                             "in AzureBlobStorage. You can specify table structure manually", *format);
 
                     throw Exception(
-                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        ErrorCodes::CANNOT_DETECT_FORMAT,
                         "The data format cannot be detected by the contents of the files, because there are no files with provided path "
                         "in AzureBlobStorage. You can specify table structure manually");
                 }
@@ -1407,7 +1407,7 @@ namespace
         {
             format = format_name;
         }
-        
+
         String getLastFileName() const override { return current_path_with_metadata.relative_path; }
 
         bool supportsLastReadBufferRecreation() const override { return true; }

From 1bff525666b96e1a433d07e933e76a845e503dcb Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 23 Jan 2024 09:35:41 +0000
Subject: [PATCH 126/884] Fix tests and docs

---
 docs/en/interfaces/schema-inference.md                   | 2 +-
 src/Client/ClientBase.cpp                                | 6 +++---
 tests/queries/0_stateless/02969_auto_format_detection.sh | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 3d3ee5c83d6..6a7b744dd43 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -55,7 +55,7 @@ DESCRIBE file('hobbies.jsonl')
 └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md), [azureBlobStorage](./engines/table-engines/integrations/azureBlobStorage.md)
+## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md), [azureBlobStorage](../engines/table-engines/integrations/azureBlobStorage.md)
 
 If the list of columns is not specified in `CREATE TABLE` query, the structure of the table will be inferred automatically from the data.
 
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 01eff0d3e4c..6c39c3ebc95 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -632,9 +632,9 @@ try
             }
             else if (query_with_output->out_file)
             {
-                const auto & format_name = FormatFactory::instance().getFormatFromFileName(out_file);
-                if (!format_name.empty())
-                    current_format = format_name;
+                auto format_name = FormatFactory::instance().tryGetFormatFromFileName(out_file);
+                if (format_name)
+                    current_format = *format_name;
             }
         }
 
diff --git a/tests/queries/0_stateless/02969_auto_format_detection.sh b/tests/queries/0_stateless/02969_auto_format_detection.sh
index 5b9b4e09efa..88d6575e499 100755
--- a/tests/queries/0_stateless/02969_auto_format_detection.sh
+++ b/tests/queries/0_stateless/02969_auto_format_detection.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-fasttest
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 48609d3c9f55b00cd787a5592c8f51a96ac9ad42 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 23 Jan 2024 09:36:01 +0000
Subject: [PATCH 127/884] Fix tests

---
 programs/local/LocalServer.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index dd96532aadd..2caf3a559a9 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -336,23 +336,23 @@ std::string LocalServer::getInitialCreateTableQuery()
     auto table_structure = config().getString("table-structure", "auto");
 
     String table_file;
-    String format_from_file_name;
+    std::optional<String> format_from_file_name;
     if (!config().has("table-file") || config().getString("table-file") == "-")
     {
         /// Use Unix tools stdin naming convention
         table_file = "stdin";
-        format_from_file_name = FormatFactory::instance().getFormatFromFileDescriptor(STDIN_FILENO);
+        format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDIN_FILENO);
     }
     else
     {
         /// Use regular file
         auto file_name = config().getString("table-file");
         table_file = quoteString(file_name);
-        format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name);
+        format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
     }
 
     auto data_format = backQuoteIfNeed(
-        config().getString("table-data-format", config().getString("format", format_from_file_name.empty() ? "TSV" : format_from_file_name)));
+        config().getString("table-data-format", config().getString("format", format_from_file_name ? *format_from_file_name : "TSV")));
 
 
     if (table_structure == "auto")

From eaca40c53efb4c08878165219242bc22a004371f Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 23 Jan 2024 10:28:57 +0000
Subject: [PATCH 128/884] Update tests

---
 src/Formats/ReadSchemaUtils.cpp                           | 8 ++++----
 tests/queries/0_stateless/01030_storage_url_syntax.sql    | 4 ++--
 tests/queries/0_stateless/02424_pod_array_overflow.sql    | 2 +-
 tests/queries/0_stateless/02426_pod_array_overflow_2.sql  | 2 +-
 tests/queries/0_stateless/02426_pod_array_overflow_3.sql  | 2 +-
 .../queries/0_stateless/02497_schema_inference_nulls.sql  | 4 ++--
 .../0_stateless/02502_bad_values_schema_inference.sql     | 2 +-
 .../02783_max_bytes_to_read_in_schema_inference.sql       | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index c882f15b4b0..08e05872c97 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -193,13 +193,13 @@ try
                 if (format_name)
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "The table structure cannot be extracted from a {} format file:\n{}\nYou can specify the structure manually",
+                        "The table structure cannot be extracted from a {} format file:\n{}\n.You can specify the structure manually",
                         *format_name,
                         exception_message);
 
                 throw Exception(
                     ErrorCodes::CANNOT_DETECT_FORMAT,
-                    "The data format cannot be detected by the contents of the files:\n{}\nYou can specify the format manually",
+                    "The data format cannot be detected by the contents of the files:\n{}\n.You can specify the format manually",
                     exception_message);
             }
 
@@ -274,7 +274,7 @@ try
                         throw Exception(
                             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                             "The table structure cannot be extracted from a {} format file. "
-                            "Error: {}. You can specify the structure manually",
+                            "Error:\n{}.\nYou can specify the structure manually",
                             *format_name,
                             exception_message);
                     }
@@ -473,7 +473,7 @@ try
                 throw Exception(
                     ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                     "The table structure cannot be extracted from a {} format file. "
-                    "Error: {}. You can specify the structure manually",
+                    "Error:\n{}.\nYou can specify the structure manually",
                     *format_name,
                     exception_messages);
             }
diff --git a/tests/queries/0_stateless/01030_storage_url_syntax.sql b/tests/queries/0_stateless/01030_storage_url_syntax.sql
index 9b31558eece..eda108aca2f 100644
--- a/tests/queries/0_stateless/01030_storage_url_syntax.sql
+++ b/tests/queries/0_stateless/01030_storage_url_syntax.sql
@@ -1,7 +1,7 @@
 drop table if exists test_table_url_syntax
 ;
 create table test_table_url_syntax (id UInt32) ENGINE = URL('')
-; -- { serverError 36 }
+; -- { serverError UNSUPPORTED_URI_SCHEME }
 create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','')
 ; -- { serverError 42 }
 drop table if exists test_table_url_syntax
@@ -11,7 +11,7 @@ drop table if exists test_table_url
 ;
 
 create table test_table_url(id UInt32) ENGINE = URL('http://localhost/endpoint')
-; -- { serverError 36 }
+; -- { serverError CANNOT_DETECT_FORMAT }
 
 create table test_table_url(id UInt32) ENGINE = URL('http://localhost/endpoint.json');
 drop table test_table_url;
diff --git a/tests/queries/0_stateless/02424_pod_array_overflow.sql b/tests/queries/0_stateless/02424_pod_array_overflow.sql
index 4b85d5be029..50c46cf19f1 100644
--- a/tests/queries/0_stateless/02424_pod_array_overflow.sql
+++ b/tests/queries/0_stateless/02424_pod_array_overflow.sql
@@ -1 +1 @@
-SELECT * FROM format(Native, '\x02\x02\x02\x6b\x30\x1a\x4d\x61\x70\x28\x46\x69\x78\x65\x64\x53\x74\x72\x69\x6e\x67\x28\x31\x29\x2c\x20\x49\x6e\x74\x36\x34\x29\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00\x7f\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xcf\x31\x3f\x56\x69\x11\x89\x25'); -- { serverError 128 }
+SELECT * FROM format(Native, '\x02\x02\x02\x6b\x30\x1a\x4d\x61\x70\x28\x46\x69\x78\x65\x64\x53\x74\x72\x69\x6e\x67\x28\x31\x29\x2c\x20\x49\x6e\x74\x36\x34\x29\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00\x7f\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xcf\x31\x3f\x56\x69\x11\x89\x25'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
diff --git a/tests/queries/0_stateless/02426_pod_array_overflow_2.sql b/tests/queries/0_stateless/02426_pod_array_overflow_2.sql
index 52a00730227..6a0d97acee3 100644
--- a/tests/queries/0_stateless/02426_pod_array_overflow_2.sql
+++ b/tests/queries/0_stateless/02426_pod_array_overflow_2.sql
@@ -1 +1 @@
-SELECT * FROM format(Native, 'k0\x23Array(Tuple(FixedString(1), Int64))\0\0\0\0\0\0\0�����\0����������������\0�\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0d\0\0\0\0\0\0\0\0\0\0\0\0\0�1?Vi�%'); -- { serverError 128 }
+SELECT * FROM format(Native, 'k0\x23Array(Tuple(FixedString(1), Int64))\0\0\0\0\0\0\0�����\0����������������\0�\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0d\0\0\0\0\0\0\0\0\0\0\0\0\0�1?Vi�%'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
diff --git a/tests/queries/0_stateless/02426_pod_array_overflow_3.sql b/tests/queries/0_stateless/02426_pod_array_overflow_3.sql
index 857ba2ca28e..caabf7d1679 100644
--- a/tests/queries/0_stateless/02426_pod_array_overflow_3.sql
+++ b/tests/queries/0_stateless/02426_pod_array_overflow_3.sql
@@ -1 +1 @@
-SELECT * FROM format(Native, '\x01\x01\x01x\x0CArray(UInt8)\x01\x00\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF'); -- { serverError 128 }
+SELECT * FROM format(Native, '\x01\x01\x01x\x0CArray(UInt8)\x01\x00\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
index a25060e8182..b78b5709dbb 100644
--- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql
+++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
@@ -4,7 +4,7 @@ set input_format_json_try_infer_named_tuples_from_objects=0;
 set input_format_json_read_objects_as_strings=0;
 set input_format_json_infer_incomplete_types_as_strings=0;
 set input_format_json_read_numbers_as_strings=0;
-desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH }
+desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 desc format(JSONEachRow, '{"x" : [null, 1]}');
 desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : []}');
 desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [null]}');
@@ -26,7 +26,7 @@ desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [null]}');
 
 select 'JSONCompactEachRow';
 set schema_inference_make_columns_nullable=1;
-desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH }
+desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 desc format(JSONCompactEachRow, '[[null, 1]]');
 desc format(JSONCompactEachRow, '[[null, 1]], [[]]');
 desc format(JSONCompactEachRow, '[[null, 1]], [[null]]');
diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql
index 4c796842c0d..67ac09832de 100644
--- a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql
+++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql
@@ -1,2 +1,2 @@
-desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
+desc format(Values, '(\'abc)'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 
diff --git a/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql b/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql
index b4165e8e80a..ef0381df1a6 100644
--- a/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql
+++ b/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql
@@ -1,5 +1,5 @@
 set input_format_max_rows_to_read_for_schema_inference=2;
 set input_format_json_infer_incomplete_types_as_strings=0;
-desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=10; -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=10; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=20;
 

From 5e4796ae161e0546845f2dca167671eadf9463c3 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 23 Jan 2024 12:46:07 +0000
Subject: [PATCH 129/884] Fix heap-use-after-free

---
 src/Formats/ReadSchemaUtils.cpp | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 08e05872c97..4c734130622 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -349,31 +349,35 @@ try
                 if (!format_name)
                 {
                     std::unordered_map<String, NamesAndTypesList> format_to_schema;
-                    for (const auto & format_to_detect : getSimilarFormatsSetForDetection())
+                    const auto & formats_set_to_detect = getSimilarFormatsSetForDetection();
+                    for (size_t i = 0; i != formats_set_to_detect.size(); ++i)
                     {
                         try
                         {
                             schema_reader = FormatFactory::instance().getSchemaReader(
-                                format_to_detect, support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
+                                formats_set_to_detect[i], support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
                             schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
                             auto tmp_names_and_types = schema_reader->readSchema();
                             /// If schema was inferred successfully for this format, remember it and try next format.
                             if (!tmp_names_and_types.empty())
-                                format_to_schema[format_to_detect] = tmp_names_and_types;
+                                format_to_schema[formats_set_to_detect[i]] = tmp_names_and_types;
                         }
                         catch (...) // NOLINT(bugprone-empty-catch)
                         {
                             /// Try next format.
                         }
 
-                        if (support_buf_recreation)
+                        if (i != formats_set_to_detect.size() - 1)
                         {
-                            read_buffer_iterator.setPreviousReadBuffer(std::move(iterator_data.buf));
-                            iterator_data.buf = read_buffer_iterator.recreateLastReadBuffer();
-                        }
-                        else
-                        {
-                            peekable_buf->rollbackToCheckpoint();
+                            if (support_buf_recreation)
+                            {
+                                read_buffer_iterator.setPreviousReadBuffer(std::move(iterator_data.buf));
+                                iterator_data.buf = read_buffer_iterator.recreateLastReadBuffer();
+                            }
+                            else
+                            {
+                                peekable_buf->rollbackToCheckpoint();
+                            }
                         }
                     }
 

From 93fbe1d9c8dc1af75094e81fc25e20ee1241bab3 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 23 Jan 2024 16:17:16 +0000
Subject: [PATCH 130/884] Fixes

---
 src/Formats/FormatFactory.cpp                    |  2 +-
 src/Formats/ReadSchemaUtils.cpp                  | 16 ++++++++--------
 .../0_stateless/02185_orc_corrupted_file.sh      |  2 +-
 .../02245_parquet_skip_unknown_type.sh           |  2 +-
 ..._json_wrong_root_type_in_schema_inference.sql |  4 ++--
 .../0_stateless/02286_mysql_dump_input_format.sh |  2 +-
 .../0_stateless/02293_formats_json_columns.sh    |  2 +-
 .../02327_capnproto_protobuf_empty_messages.sh   |  8 ++++----
 .../0_stateless/02416_json_object_inference.sql  |  2 +-
 ...uplicate_column_names_in_schema_inference.sql |  8 ++++----
 .../02458_use_structure_from_insertion_table.sql | 10 +++++-----
 tests/queries/0_stateless/02724_database_s3.sh   |  2 +-
 tests/queries/0_stateless/02725_database_hdfs.sh |  2 +-
 .../02900_union_schema_inference_mode.sh         |  4 ++--
 14 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index cacb5a510da..b6f8f041d8d 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -673,7 +673,7 @@ std::optional<String> FormatFactory::tryGetFormatFromFileDescriptor(int fd)
 #elif defined(OS_DARWIN)
     char file_path[PATH_MAX] = {'\0'};
     if (fcntl(fd, F_GETPATH, file_path) != -1)
-        return tryGetFormatFromFileName(file_path, false);
+        return tryGetFormatFromFileName(file_path);
     return std::nullopt;
 #else
     (void)fd;
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 4c734130622..5576da56dbf 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -127,7 +127,6 @@ try
         IReadBufferIterator::Data iterator_data;
         std::vector<std::pair<NamesAndTypesList, String>> schemas_for_union_mode;
         std::string exception_messages;
-        SchemaReaderPtr schema_reader;
         size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference
                                                   : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference;
         size_t max_bytes_to_read = format_settings ? format_settings->max_bytes_to_read_for_schema_inference
@@ -227,6 +226,8 @@ try
                 continue;
             }
 
+            SchemaReaderPtr schema_reader;
+
             if (format_name)
             {
                 try
@@ -417,12 +418,11 @@ try
         if (!format_name)
             throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
 
-        /// If we got all schemas from cache, schema_reader can be uninitialized.
-        /// But we still need some stateless methods of ISchemaReader,
-        /// let's initialize it with empty buffer.
+        /// We need some stateless methods of ISchemaReader, but during reading schema we
+        /// could not even create a schema reader (for example when we got schema from cache).
+        /// Let's create stateless schema reader from empty read buffer.
         EmptyReadBuffer empty;
-        if (!schema_reader)
-            schema_reader = FormatFactory::instance().getSchemaReader(*format_name, empty, context, format_settings);
+        SchemaReaderPtr stateless_schema_reader = FormatFactory::instance().getSchemaReader(*format_name, empty, context, format_settings);
 
         if (mode == SchemaInferenceMode::UNION)
         {
@@ -449,7 +449,7 @@ try
                             /// If types are not the same, try to transform them according
                             /// to the format to find common type.
                             auto new_type_copy = type;
-                            schema_reader->transformTypesFromDifferentFilesIfNeeded(it->second, new_type_copy);
+                            stateless_schema_reader->transformTypesFromDifferentFilesIfNeeded(it->second, new_type_copy);
 
                             /// If types are not the same after transform, we cannot do anything, throw an exception.
                             if (!it->second->equals(*new_type_copy))
@@ -495,7 +495,7 @@ try
         /// It will allow to execute simple data loading with query
         /// "INSERT INTO table SELECT * FROM ..."
         const auto & insertion_table = context->getInsertionTable();
-        if (schema_reader && !schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
+        if (!stateless_schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
         {
             auto storage = DatabaseCatalog::instance().getTable(insertion_table, context);
             auto metadata = storage->getInMemoryMetadataPtr();
diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh
index 1987f094faa..12510ae3836 100755
--- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh
+++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh
@@ -8,4 +8,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/
 
-${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
+${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
index 954e2e83f27..8ff6e28b123 100755
--- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
+++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
@@ -12,6 +12,6 @@ DATA_FILE=$USER_FILES_PATH/$FILE_NAME
 cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE
 
 
-$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "Cannot extract table structure" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL"
 $CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
 $CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
index 5462d38f1a3..98bf29c32f5 100644
--- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
+++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
@@ -1,7 +1,7 @@
 -- Tags: no-fasttest
 
 insert into function file('02268_data.jsonl', 'TSV') select 1;
-select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
+select * from file('02268_data.jsonl'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 
 insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1;
-select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
+select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
index a3711497ae8..2f6167c3ddf 100755
--- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
+++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
@@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mys
 $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
 $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
-$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"  2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"  2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
 $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL'
 
 echo "dump2"
diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh
index ce35c4bd878..4eae5a1abb4 100755
--- a/tests/queries/0_stateless/02293_formats_json_columns.sh
+++ b/tests/queries/0_stateless/02293_formats_json_columns.sh
@@ -88,4 +88,4 @@ echo '
 }
 ' > $DATA_FILE
 
-$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
index dfc0dedeaf1..650faf6985e 100755
--- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
+++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
@@ -15,11 +15,11 @@ mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR
 cp -r $CLIENT_SCHEMADIR/02327_* $SCHEMADIR/$SERVER_SCHEMADIR/
 
 
-$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
-$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
+$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
+$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
 
-$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
-$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
+$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
+$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
 
 $CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1";
 $CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference=1";
diff --git a/tests/queries/0_stateless/02416_json_object_inference.sql b/tests/queries/0_stateless/02416_json_object_inference.sql
index 91137c0243c..3022ee026d0 100644
--- a/tests/queries/0_stateless/02416_json_object_inference.sql
+++ b/tests/queries/0_stateless/02416_json_object_inference.sql
@@ -2,5 +2,5 @@
 set allow_experimental_object_type=1;
 desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}');
 set allow_experimental_object_type=0, input_format_json_read_objects_as_strings=0, input_format_json_try_infer_named_tuples_from_objects=0, input_format_json_read_numbers_as_strings=0;
-desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652}
+desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 
diff --git a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql
index 626a4d7034e..f67e5496a98 100644
--- a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql
+++ b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql
@@ -1,7 +1,7 @@
 -- Tags: no-fasttest
 
-desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA}
-desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA}
-desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA}
-desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError INCORRECT_DATA}
+desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 
diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
index ac549a7faf1..71a2381d7b6 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
@@ -10,14 +10,14 @@ set input_format_json_infer_incomplete_types_as_strings=0;
 insert into test select * from file(02458_data.jsonl);
 insert into test select x, 1 from file(02458_data.jsonl);
 insert into test select x, y from file(02458_data.jsonl);
-insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 insert into test select x, z from file(02458_data.jsonl);
 
 insert into test select * from file(02458_data.jsoncompacteachrow);
-insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 
 insert into test select * from input() format CSV 1,2
 insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh
index 13b627c0342..80b47282146 100755
--- a/tests/queries/0_stateless/02724_database_s3.sh
+++ b/tests/queries/0_stateless/02724_database_s3.sh
@@ -46,7 +46,7 @@ DROP DATABASE IF EXISTS test3;
 CREATE DATABASE test3 ENGINE = S3;
 USE test3;
 SELECT * FROM \"http://localhost:11111/test/a.myext\"
-""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
+""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "S3_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||:
 
 ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """
 USE test3;
diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh
index b4e081f6de0..71ccee6f5f4 100755
--- a/tests/queries/0_stateless/02725_database_hdfs.sh
+++ b/tests/queries/0_stateless/02725_database_hdfs.sh
@@ -58,7 +58,7 @@ SELECT * FROM \"abacaba/file.tsv\"
 """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "HDFS_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||:
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
 
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
index dc0dd8ae1f4..a0fdb5276e0 100755
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -39,13 +39,13 @@ desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl');
 "
 
 echo 'Error' > $CLICKHOUSE_TEST_UNIQUE_NAME/data4.jsonl
-$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure"
+$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE"
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
 desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{2,3}.jsonl');
 desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl');
-" 2>&1 | grep -c -F "Cannot extract table structure"
+" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE"
 
 echo 42 > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.csv
 echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv

From 849858017237d9752f3efb801bcc2267288cb8c8 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 24 Jan 2024 10:01:06 +0100
Subject: [PATCH 131/884] Fixing build

---
 src/Backups/BackupIO_AzureBlobStorage.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index fca324869ae..34be110cd42 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -11,6 +11,7 @@
 #include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
 #include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
 #include <Disks/IDisk.h>
+#include <Disks/DiskType.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -143,7 +144,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
     : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterAzureBlobStorage"))
-    , data_source_description{DataSourceType::AzureBlobStorage,configuration_.container, false, false}
+    , data_source_description{DataSourceType::AzureBlobStorage, configuration_.container, false, false}
     , configuration(configuration_)
 {
     auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);

From 788eb487075fe770097759edfd46544134e11116 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 24 Jan 2024 11:51:02 +0100
Subject: [PATCH 132/884] Fix build after merging master

---
 src/Backups/BackupIO_AzureBlobStorage.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 34be110cd42..2c2396e9c0a 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -34,7 +34,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
     : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderAzureBlobStorage"))
-    , data_source_description{DataSourceType::AzureBlobStorage, configuration_.container, false, false}
+    , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
     auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
@@ -99,7 +99,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
     LOG_INFO(&Poco::Logger::get("BackupReaderAzureBlobStorage"), "Enter copyFileToDisk");
 
     auto destination_data_source_description = destination_disk->getDataSourceDescription();
-    if ((destination_data_source_description.type == DataSourceType::AzureBlobStorage)
+    if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
+        && (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
         && (destination_data_source_description.is_encrypted == encrypted_in_backup))
     {
         LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
@@ -144,7 +145,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
     : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterAzureBlobStorage"))
-    , data_source_description{DataSourceType::AzureBlobStorage, configuration_.container, false, false}
+    , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
     auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);

From 10aaf2cbe46c7a00f744ad2e6183c441db847587 Mon Sep 17 00:00:00 2001
From: Alex Cheng <alex.cheng@sunallies.com>
Date: Wed, 24 Jan 2024 21:10:48 +0800
Subject: [PATCH 133/884] fix the default number of async_insert_max_data_size

---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index f085fe1abcd..4bef6f4a02d 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1922,7 +1922,7 @@ Possible values:
 - Positive integer.
 - 0 — Asynchronous insertions are disabled.
 
-Default value: `100000`.
+Default value: `1000000`.
 
 ### async_insert_max_query_number {#async-insert-max-query-number}
 

From e988f8a47142ab07228fbaee9acb4ba64f2644e1 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 24 Jan 2024 17:30:04 +0100
Subject: [PATCH 134/884] fix typo in formats.md

---
 docs/en/interfaces/formats.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index fd44fbf4462..a3f54c1c383 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -279,7 +279,7 @@ the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quo
 
 `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;`
 
-In cases where it is challenging or not possible to deploy format output configuration for the template format to a directory on all nodes in a cluste, or if the format is trivial then `format_schema_rows_template` can be used to pass the template string directly in the query, rather than a path to the file which contains it.
+In cases where it is challenging or not possible to deploy format output configuration for the template format to a directory on all nodes in a cluster, or if the format is trivial then `format_schema_rows_template` can be used to pass the template string directly in the query, rather than a path to the file which contains it.
 
 The `format_template_rows_between_delimiter` setting specifies the delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default)
 

From 6a9e7abf05760ef0eb7f531970f9eb110e7b4ea8 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 24 Jan 2024 17:57:46 +0100
Subject: [PATCH 135/884] Update 00937_format_schema_rows_template.sh

---
 .../queries/0_stateless/00937_format_schema_rows_template.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index 651e3618f83..3124cc3b52b 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -26,7 +26,8 @@ echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Like
 $CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
 format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
-format_template_rows_between_delimiter = ';\n'"; -- { serverError 474 }
+format_template_rows_between_delimiter = ';\n'"; 
+-- { serverError 474 }
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE template";
-rm "$CURDIR"/00937_template_output_format_row.tmp
\ No newline at end of file
+rm "$CURDIR"/00937_template_output_format_row.tmp

From 11f1ea50d7182e3f9493e026b85cd91f6461aab4 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 24 Jan 2024 17:55:31 +0000
Subject: [PATCH 136/884] Fix tests

---
 src/Formats/ReadSchemaUtils.cpp               |  9 +--
 src/Storages/HDFS/StorageHDFS.cpp             |  2 +-
 src/Storages/StorageAzureBlob.cpp             | 60 +++++++++++++++----
 src/Storages/StorageS3.cpp                    | 33 +++++-----
 .../TableFunctionAzureBlobStorage.cpp         |  2 +-
 tests/integration/test_storage_hdfs/test.py   |  4 +-
 tests/integration/test_storage_s3/test.py     |  6 +-
 .../0_stateless/02725_database_hdfs.sh        |  2 +-
 8 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 5576da56dbf..f97df25aba7 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -226,10 +226,12 @@ try
                 continue;
             }
 
-            SchemaReaderPtr schema_reader;
+            std::unique_ptr<PeekableReadBuffer> peekable_buf; /// Can be used in format detection. Should be destroyed after schema reader.
 
             if (format_name)
             {
+                SchemaReaderPtr schema_reader;
+
                 try
                 {
                     schema_reader = FormatFactory::instance().getSchemaReader(*format_name, *iterator_data.buf, context, format_settings);
@@ -296,7 +298,6 @@ try
                 /// to high memory usage as it will save all the read data from the beginning of the file,
                 /// especially it will be noticeable for formats like Parquet/ORC/Arrow that do seeks to the
                 /// end of file.
-                std::unique_ptr<PeekableReadBuffer> peekable_buf;
                 bool support_buf_recreation = read_buffer_iterator.supportsLastReadBufferRecreation();
                 if (!support_buf_recreation)
                 {
@@ -310,7 +311,7 @@ try
                 {
                     try
                     {
-                        schema_reader = FormatFactory::instance().getSchemaReader(format_to_detect, support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
+                        SchemaReaderPtr schema_reader = FormatFactory::instance().getSchemaReader(format_to_detect, support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
                         schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
                         names_and_types = schema_reader->readSchema();
                         if (names_and_types.empty())
@@ -355,7 +356,7 @@ try
                     {
                         try
                         {
-                            schema_reader = FormatFactory::instance().getSchemaReader(
+                            SchemaReaderPtr schema_reader = FormatFactory::instance().getSchemaReader(
                                 formats_set_to_detect[i], support_buf_recreation ? *iterator_data.buf : *peekable_buf, context, format_settings);
                             schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
                             auto tmp_names_and_types = schema_reader->readSchema();
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index a846e9fd9ef..59eba6505f3 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -1147,7 +1147,7 @@ void registerStorageHDFS(StorageFactory & factory)
         }
 
         if (format_name == "auto")
-            format_name = FormatFactory::instance().getFormatFromFileName(url);
+            format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto");
 
         String compression_method;
         if (engine_args.size() == 3)
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 35072dc5cae..c55725ce940 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -144,7 +144,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
         configuration.blobs_paths = {configuration.blob_path};
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
+            configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto");
 
         return configuration;
     }
@@ -237,7 +237,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
     configuration.blobs_paths = {configuration.blob_path};
 
     if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto");
 
     return configuration;
 }
@@ -1316,10 +1316,28 @@ namespace
         Data next() override
         {
             /// For default mode check cached columns for currently read keys on first iteration.
-            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            if (first)
             {
-                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                    return {nullptr, cached_columns, format};
+                /// If format is unknown we iterate through all currently read keys on first iteration and
+                /// try to determine format by file name.
+                if (!format)
+                {
+                    for (const auto & key : read_keys)
+                    {
+                        if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path))
+                        {
+                            format = format_from_path;
+                            break;
+                        }
+                    }
+                }
+
+                /// For default mode check cached columns for currently read keys on first iteration.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                {
+                    if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                        return {nullptr, cached_columns, format};
+                }
             }
 
             current_path_with_metadata = file_iterator->next();
@@ -1345,15 +1363,33 @@ namespace
 
             first = false;
 
-            /// AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
-            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
+            /// AzureBlobStorage file iterator could get new keys after new iteration.
+            if (read_keys.size() > prev_read_keys_size)
             {
-                auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+                /// If format is unknown we can try to determine it by new file names.
+                if (!format)
+                {
+                    for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
+                    {
+                        if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path))
+                        {
+                            format = format_from_file_name;
+                            break;
+                        }
+                    }
+                }
+                /// Check new files in schema cache if schema inference mode is default.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                {
+                    auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+                    if (columns_from_cache)
+                        return {nullptr, columns_from_cache, format};
+                }
+
                 prev_read_keys_size = read_keys.size();
-                if (columns_from_cache)
-                    return {nullptr, columns_from_cache, format};
             }
-            else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+
+            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
             {
                 RelativePathsWithMetadata paths = {current_path_with_metadata};
                 if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
@@ -1520,7 +1556,7 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData(
     const std::optional<FormatSettings> & format_settings,
     const DB::ContextPtr & ctx)
 {
-    return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx).first;
+    return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first;
 }
 
 SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx)
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 8e5b6040a63..f9c7400edfb 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1702,30 +1702,33 @@ namespace
                     return {nullptr, std::nullopt, format};
                 }
 
-                /// S3 file iterator could get new keys after new iteration, if format is unknown we can try to determine it by new file names.
-                if (!format && read_keys.size() > prev_read_keys_size)
+                /// S3 file iterator could get new keys after new iteration
+                if (read_keys.size() > prev_read_keys_size)
                 {
-                    for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
+                    /// If format is unknown we can try to determine it by new file names.
+                    if (!format)
                     {
-                        if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key))
+                        for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
                         {
-                            format = format_from_file_name;
-                            break;
+                            if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key))
+                            {
+                                format = format_from_file_name;
+                                break;
+                            }
                         }
                     }
-                }
 
-                /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
-                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
-                {
-                    auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+                    /// Check new files in schema cache if schema inference mode is default.
+                    if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+                    {
+                        auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+                        if (columns_from_cache)
+                            return {nullptr, columns_from_cache, format};
+                    }
+
                     prev_read_keys_size = read_keys.size();
-                    if (columns_from_cache)
-                        return {nullptr, columns_from_cache, format};
                 }
 
-                prev_read_keys_size = read_keys.size();
-
                 if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
                     continue;
 
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
index b9e0af53b7b..8a537e154db 100644
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
+++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
@@ -58,7 +58,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const
         configuration.blobs_paths = {configuration.blob_path};
 
         if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path);
+            configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto");
     }
     else
     {
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 8dee15f4d94..165dfb212b7 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -600,7 +600,7 @@ def test_schema_inference_with_globs(started_cluster):
     )
 
     assert (
-        "Cannot extract table structure from JSONCompactEachRow format file" in result
+        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
     )
 
 
@@ -1044,7 +1044,7 @@ def test_union_schema_inference_mode(started_cluster):
     error = node.query_and_get_error(
         "desc hdfs('hdfs://hdfs1:9000/test_union_schema_inference*.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
     )
-    assert "Cannot extract table structure" in error
+    assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error
 
 
 def test_format_detection(started_cluster):
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 365ade7da65..0b5e9462860 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1380,7 +1380,7 @@ def test_schema_inference_from_globs(started_cluster):
     )
 
     assert (
-        "Cannot extract table structure from JSONCompactEachRow format file" in result
+        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
     )
 
     url_filename = "test{0,1,2,3}.jsoncompacteachrow"
@@ -1390,7 +1390,7 @@ def test_schema_inference_from_globs(started_cluster):
     )
 
     assert (
-        "Cannot extract table structure from JSONCompactEachRow format file" in result
+        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
     )
 
 
@@ -2193,7 +2193,7 @@ def test_union_schema_inference_mode(started_cluster):
         error = instance.query_and_get_error(
             f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
         )
-        assert "Cannot extract table structure" in error
+        assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error
 
 
 def test_s3_format_detection(started_cluster):
diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh
index 71ccee6f5f4..d62f928e947 100755
--- a/tests/queries/0_stateless/02725_database_hdfs.sh
+++ b/tests/queries/0_stateless/02725_database_hdfs.sh
@@ -58,7 +58,7 @@ SELECT * FROM \"abacaba/file.tsv\"
 """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "HDFS_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||:
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
 
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:

From f551081dd4c38ac014f554c7ee4efc4e18777f9a Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 24 Jan 2024 21:10:50 +0100
Subject: [PATCH 137/884] Addressed review comments

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  7 ++---
 .../copyAzureBlobStorageFile.cpp              | 27 ++++++++-----------
 .../copyAzureBlobStorageFile.h                |  8 +++---
 3 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 2c2396e9c0a..1b4c10ad0cb 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -104,7 +104,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
         && (destination_data_source_description.is_encrypted == encrypted_in_backup))
     {
         LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
-        auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes) -> size_t
+        auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
         {
             /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
             if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
@@ -123,7 +123,6 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                 /* dest_path */ blob_path[0],
                 settings,
                 read_settings,
-                object_attributes,
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
                 /* for_disk_azure_blob_storage= */ true);
 
@@ -180,7 +179,6 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
                 fs::path(configuration.blob_path) / path_in_backup,
                 settings,
                 read_settings,
-                {},
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
             return; /// copied!
         }
@@ -204,14 +202,13 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        /* dest_path */ destination,
        settings,
        read_settings,
-       {},
        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
        /* for_disk_azure_blob_storage= */ true);
 }
 
 void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, {},
+    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index bb8702e9b41..350d2d1d34e 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -47,10 +47,9 @@ namespace
             MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client_,
             size_t offset_,
             size_t total_size_,
-            const String & dest_container_,
+            const String & dest_container_for_logging_,
             const String & dest_blob_,
             MultiVersion<AzureObjectStorageSettings> settings_,
-            const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_azure_blob_storage_,
             const Poco::Logger * log_)
@@ -58,10 +57,9 @@ namespace
             , client(client_)
             , offset (offset_)
             , total_size (total_size_)
-            , dest_container(dest_container_)
+            , dest_container_for_logging(dest_container_for_logging_)
             , dest_blob(dest_blob_)
             , settings(settings_)
-            , object_metadata(object_metadata_)
             , schedule(schedule_)
             , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
             , log(log_)
@@ -76,10 +74,9 @@ namespace
         MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client;
         size_t offset;
         size_t total_size;
-        const String & dest_container;
+        const String & dest_container_for_logging;
         const String & dest_blob;
         MultiVersion<AzureObjectStorageSettings> settings;
-        const std::optional<std::map<String, String>> & object_metadata;
         ThreadPoolCallbackRunner<void> schedule;
         bool for_disk_azure_blob_storage;
         const Poco::Logger * log;
@@ -208,7 +205,7 @@ namespace
 
         void uploadPart(size_t part_offset, size_t part_size)
         {
-            LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, Size: {}", dest_container, dest_blob, part_size);
+            LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, Size: {}", dest_container_for_logging, dest_blob, part_size);
 
             if (!part_size)
             {
@@ -287,7 +284,7 @@ namespace
 
             std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
             task.block_id = block_id;
-            LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, block_id: {}, Parts: {}", dest_container, dest_blob, block_id, bg_tasks.size());
+            LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, block_id: {}, Parts: {}", dest_container_for_logging, dest_blob, block_id, bg_tasks.size());
         }
 
         String processUploadPartRequest(UploadPartTask & task)
@@ -331,14 +328,13 @@ void copyDataToAzureBlobStorageFile(
     size_t offset,
     size_t size,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
-    const String & dest_container,
+    const String & dest_container_for_logging,
     const String & dest_blob,
     MultiVersion<AzureObjectStorageSettings> settings,
-    const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
-    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container, dest_blob, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
+    UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
     helper.performCopy();
 }
 
@@ -346,15 +342,14 @@ void copyDataToAzureBlobStorageFile(
 void copyAzureBlobStorageFile(
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
-    const String & src_container,
+    const String & src_container_for_logging,
     const String & src_blob,
     size_t offset,
     size_t size,
-    const String & dest_container,
+    const String & dest_container_for_logging,
     const String & dest_blob,
     MultiVersion<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
-    const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
@@ -390,14 +385,14 @@ void copyAzureBlobStorageFile(
     }
     else
     {
-        LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container, src_blob);
+        LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
         auto create_read_buffer = [&]
         {
             return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client.get(), src_blob, read_settings, settings.get()->max_single_read_retries,
             settings.get()->max_single_download_retries);
         };
 
-        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container, dest_blob, settings, object_metadata, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
+        UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
         helper.performCopy();
     }
 }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 491f7cd7176..15a31031f63 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -23,15 +23,14 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 void copyAzureBlobStorageFile(
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
-    const String & src_container,
+    const String & src_container_for_logging,
     const String & src_blob,
     size_t src_offset,
     size_t src_size,
-    const String & dest_container,
+    const String & dest_container_for_logging,
     const String & dest_blob,
     MultiVersion<AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
-    const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
 
@@ -46,10 +45,9 @@ void copyDataToAzureBlobStorageFile(
     size_t offset,
     size_t size,
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client,
-    const String & dest_container,
+    const String & dest_container_for_logging,
     const String & dest_blob,
     MultiVersion<AzureObjectStorageSettings> settings,
-    const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
 

From ad196dd047e443158b18b8dfc52d1cf2d14d6593 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 25 Jan 2024 01:18:27 +0200
Subject: [PATCH 138/884] Update 00937_format_schema_rows_template.sh

fix failing shellcheck
---
 tests/queries/0_stateless/00937_format_schema_rows_template.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index 3124cc3b52b..8b512513d94 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -26,8 +26,7 @@ echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Like
 $CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
 format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
-format_template_rows_between_delimiter = ';\n'"; 
--- { serverError 474 }
+format_template_rows_between_delimiter = ';\n'; --{ serverError 474 }"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE template";
 rm "$CURDIR"/00937_template_output_format_row.tmp

From 3e3ae52acaedc57b9470f5b59c45307a7e048068 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 24 Jan 2024 14:35:35 +0000
Subject: [PATCH 139/884] Reduce even more memory

---
 programs/keeper-converter/KeeperConverter.cpp |  42 ++--
 programs/keeper/CMakeLists.txt                |   2 +-
 src/Coordination/KeeperSnapshotManager.cpp    |  82 ++++---
 src/Coordination/KeeperStorage.cpp            | 223 +++++++++++-------
 src/Coordination/KeeperStorage.h              | 153 ++++++++++--
 src/Coordination/SnapshotableHashTable.h      |  34 +--
 src/Coordination/ZooKeeperDataReader.cpp      |   2 +-
 7 files changed, 353 insertions(+), 185 deletions(-)

diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp
index 20448aafa2f..99f8bab3403 100644
--- a/programs/keeper-converter/KeeperConverter.cpp
+++ b/programs/keeper-converter/KeeperConverter.cpp
@@ -38,31 +38,31 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
         return 0;
     }
 
-    try
-    {
-        auto keeper_context = std::make_shared<KeeperContext>(true);
-        keeper_context->setDigestEnabled(true);
-        keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
+    //try
+    //{
+    //    auto keeper_context = std::make_shared<KeeperContext>(true);
+    //    keeper_context->setDigestEnabled(true);
+    //    keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
 
-        DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
+    //    DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
 
-        DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
-        storage.initializeSystemNodes();
+    //    DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
+    //    storage.initializeSystemNodes();
 
-        DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
-        DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
-        DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
+    //    DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
+    //    DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
+    //    DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
 
-        DB::KeeperSnapshotManager manager(1, keeper_context);
-        auto snp = manager.serializeSnapshotToBuffer(snapshot);
-        auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
-        std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl;
-    }
-    catch (...)
-    {
-        std::cerr << getCurrentExceptionMessage(true) << '\n';
-        return getCurrentExceptionCode();
-    }
+    //    DB::KeeperSnapshotManager manager(1, keeper_context);
+    //    auto snp = manager.serializeSnapshotToBuffer(snapshot);
+    //    auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
+    //    std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl;
+    //}
+    //catch (...)
+    //{
+    //    std::cerr << getCurrentExceptionMessage(true) << '\n';
+    //    return getCurrentExceptionCode();
+    //}
 
     return 0;
 }
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 143ded0ee85..fba9b3e4d86 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -44,7 +44,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp
+        #${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp
 
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index ee5935015e4..2f51e855763 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -79,20 +79,21 @@ namespace
             writeBinary(false, out);
 
         /// Serialize stat
-        writeBinary(node.stat.czxid, out);
-        writeBinary(node.stat.mzxid, out);
-        writeBinary(node.stat.ctime, out);
-        writeBinary(node.stat.mtime, out);
-        writeBinary(node.stat.version, out);
-        writeBinary(node.stat.cversion, out);
-        writeBinary(node.stat.aversion, out);
-        writeBinary(node.stat.ephemeralOwner, out);
+        writeBinary(node.czxid, out);
+        writeBinary(node.mzxid, out);
+        writeBinary(node.ctime(), out);
+        writeBinary(node.mtime(), out);
+        writeBinary(node.version, out);
+        writeBinary(node.cversion, out);
+        writeBinary(node.aversion, out);
+        const bool is_ephemeral = node.isEphemeral(); 
+        writeBinary(is_ephemeral ? node.ephemeralOwner() : 0, out);
         if (version < SnapshotVersion::V6)
-            writeBinary(static_cast<int32_t>(node.getData().size()), out);
-        writeBinary(node.stat.numChildren, out);
-        writeBinary(node.stat.pzxid, out);
+            writeBinary(static_cast<int32_t>(node.data_size), out);
+        writeBinary(is_ephemeral ? 0 : node.numChildren(), out);
+        writeBinary(node.pzxid, out);
 
-        writeBinary(node.seq_num, out);
+        writeBinary(node.seqNum(), out);
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
             writeBinary(node.sizeInBytes(), out);
@@ -102,7 +103,7 @@ namespace
     {
         String new_data;
         readBinary(new_data, in);
-        node.setData(std::move(new_data));
+        node.setData(new_data);
 
         if (version >= SnapshotVersion::V1)
         {
@@ -138,22 +139,41 @@ namespace
         }
 
         /// Deserialize stat
-        readBinary(node.stat.czxid, in);
-        readBinary(node.stat.mzxid, in);
-        readBinary(node.stat.ctime, in);
-        readBinary(node.stat.mtime, in);
-        readBinary(node.stat.version, in);
-        readBinary(node.stat.cversion, in);
-        readBinary(node.stat.aversion, in);
-        readBinary(node.stat.ephemeralOwner, in);
+        readBinary(node.czxid, in);
+        readBinary(node.mzxid, in);
+        int64_t ctime;
+        readBinary(ctime, in);
+        node.setCtime(ctime);
+        int64_t mtime;
+        readBinary(mtime, in);
+        node.setMtime(mtime);
+        readBinary(node.version, in);
+        readBinary(node.cversion, in);
+        readBinary(node.aversion, in);
+        int64_t ephemeral_owner = 0;
+        readBinary(ephemeral_owner, in);
+        if (ephemeral_owner != 0)
+        {
+            node.is_ephemeral_and_mtime.is_ephemeral = true;
+            node.ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
+        }
+
         if (version < SnapshotVersion::V6)
         {
             int32_t data_length = 0;
             readBinary(data_length, in);
         }
-        readBinary(node.stat.numChildren, in);
-        readBinary(node.stat.pzxid, in);
-        readBinary(node.seq_num, in);
+        int32_t num_children;
+        readBinary(num_children, in);
+        if (num_children)
+            node.ephemeral_or_children_data.children_info.num_children = num_children;
+
+        readBinary(node.pzxid, in);
+
+        int32_t seq_num;
+        readBinary(seq_num, in);
+        if (seq_num)
+            node.ephemeral_or_children_data.children_info.seq_num = seq_num;
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
         {
@@ -238,7 +258,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
         /// Benign race condition possible while taking snapshot: NuRaft decide to create snapshot at some log id
         /// and only after some time we lock storage and enable snapshot mode. So snapshot_container_size can be
         /// slightly bigger than required.
-        if (node.stat.mzxid > snapshot.zxid)
+        if (node.mzxid > snapshot.zxid)
             break;
 
         writeBinary(path, out);
@@ -363,9 +383,9 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
     if (recalculate_digest)
         storage.nodes_digest = 0;
 
-    const auto is_node_empty = [](const auto & node)
+    const auto is_node_empty = [](const auto & /*node*/)
     {
-        return node.getData().empty() && node.stat == KeeperStorage::Node::Stat{};
+        return false; //node.getData().empty() && node == KeeperStorage::Node{};
     };
 
     for (size_t nodes_read = 0; nodes_read < snapshot_container_size; ++nodes_read)
@@ -412,8 +432,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
         }
 
         storage.container.insertOrReplace(path, node);
-        if (node.stat.ephemeralOwner != 0)
-            storage.ephemerals[node.stat.ephemeralOwner].insert(path);
+        if (node.isEphemeral())
+            storage.ephemerals[node.ephemeralOwner()].insert(path);
 
         if (recalculate_digest)
             storage.nodes_digest += node.getDigest(path);
@@ -433,12 +453,12 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
     {
         if (itr.key != "/")
         {
-            if (itr.value.stat.numChildren != static_cast<int32_t>(itr.value.getChildren().size()))
+            if (itr.value.numChildren() != static_cast<int32_t>(itr.value.getChildren().size()))
             {
 #ifdef NDEBUG
                 /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA.
                 LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "Children counter in stat.numChildren {}"
-                            " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key);
+                            " is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key);
 #else
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}"
                                 " is different from actual children size {} for node {}",
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index c128d7c2f98..6cdbedc2dc6 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -166,56 +166,88 @@ KeeperStorage::ResponsesForSessions processWatchesImpl(
 }
 
 // When this function is updated, update CURRENT_DIGEST_VERSION!!
-uint64_t calculateDigest(std::string_view path, std::string_view data, const KeeperStorage::Node::Stat & stat)
+uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node)
 {
     SipHash hash;
 
     hash.update(path);
 
-    hash.update(data);
+    hash.update(node.data, node.data_size);
 
-    hash.update(stat.czxid);
-    hash.update(stat.czxid);
-    hash.update(stat.mzxid);
-    hash.update(stat.ctime);
-    hash.update(stat.mtime);
-    hash.update(stat.version);
-    hash.update(stat.cversion);
-    hash.update(stat.aversion);
-    hash.update(stat.ephemeralOwner);
-    hash.update(data.length());
-    hash.update(stat.numChildren);
-    hash.update(stat.pzxid);
+    hash.update(node.czxid);
+    hash.update(node.czxid);
+    hash.update(node.mzxid);
+    hash.update(node.ctime());
+    hash.update(node.mtime());
+    hash.update(node.version);
+    hash.update(node.cversion);
+    hash.update(node.aversion);
+    bool is_ephemeral = node.isEphemeral();
+    hash.update(is_ephemeral ? node.ephemeral_or_children_data.ephemeral_owner : 0);
+    hash.update(node.data_size);
+    hash.update(is_ephemeral ? 0 : node.ephemeral_or_children_data.children_info.num_children);
+    hash.update(node.pzxid);
 
     return hash.get64();
 }
 
 }
 
+void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
+{
+    czxid = stat.czxid;
+    mzxid = stat.mzxid;
+    pzxid = stat.pzxid;
+
+    setCtime(stat.ctime);
+    setMtime(stat.mtime);
+
+    version = stat.version;
+    cversion = stat.cversion;
+    aversion = stat.aversion;
+
+    if (stat.ephemeralOwner == 0)
+    {
+        is_ephemeral_and_mtime.is_ephemeral = false;
+        ephemeral_or_children_data.children_info.num_children = stat.numChildren;
+    }
+    else
+    {
+        is_ephemeral_and_mtime.is_ephemeral = true;
+        ephemeral_or_children_data.ephemeral_owner = stat.ephemeralOwner;
+    }
+}
+
 void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) const
 {
-    response_stat.czxid = stat.czxid;
-    response_stat.mzxid = stat.mzxid;
-    response_stat.ctime = stat.ctime;
-    response_stat.mtime = stat.mtime;
-    response_stat.version = stat.version;
-    response_stat.cversion = stat.cversion;
-    response_stat.aversion = stat.aversion;
-    response_stat.ephemeralOwner = stat.ephemeralOwner;
-    response_stat.dataLength = static_cast<int32_t>(data.size());
-    response_stat.numChildren = stat.numChildren;
-    response_stat.pzxid = stat.pzxid;
+    response_stat.czxid = czxid;
+    response_stat.mzxid = mzxid;
+    response_stat.ctime = ctime();
+    response_stat.mtime = mtime();
+    response_stat.version = version;
+    response_stat.cversion = cversion;
+    response_stat.aversion = aversion;
+    bool is_ephemeral = isEphemeral();
+    response_stat.ephemeralOwner = is_ephemeral ? ephemeral_or_children_data.ephemeral_owner : 0;
+    response_stat.dataLength = static_cast<int32_t>(data_size);
+    response_stat.numChildren = is_ephemeral ? 0 : numChildren();
+    response_stat.pzxid = pzxid;
 
 }
 
 uint64_t KeeperStorage::Node::sizeInBytes() const
 {
-    return sizeof(Node) + children.size() * sizeof(StringRef) + data.size();
+    return sizeof(Node) + children.size() * sizeof(StringRef) + data_size;
 }
 
-void KeeperStorage::Node::setData(String new_data)
+void KeeperStorage::Node::setData(const String & new_data)
 {
-    data = std::move(new_data);
+    data_size = static_cast<uint32_t>(new_data.size());
+    if (data_size != 0)
+    {
+        data = new char[new_data.size()];
+        memcpy(data, new_data.data(), data_size);
+    }
 }
 
 void KeeperStorage::Node::addChild(StringRef child_path)
@@ -230,15 +262,15 @@ void KeeperStorage::Node::removeChild(StringRef child_path)
 
 void KeeperStorage::Node::invalidateDigestCache() const
 {
-    has_cached_digest = false;
+    has_cached_digest_and_ctime.has_cached_digest = false;
 }
 
 UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const
 {
-    if (!has_cached_digest)
+    if (!has_cached_digest_and_ctime.has_cached_digest)
     {
-        cached_digest = calculateDigest(path, data, stat);
-        has_cached_digest = true;
+        cached_digest = calculateDigest(path, *this);
+        has_cached_digest_and_ctime.has_cached_digest = true;
     }
 
     return cached_digest;
@@ -246,9 +278,28 @@ UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const
 
 void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other)
 {
-    stat = other.stat;
-    seq_num = other.seq_num;
-    setData(other.getData());
+    czxid = other.czxid;
+    mzxid = other.mzxid;
+    pzxid = other.pzxid;
+    acl_id = other.acl_id; /// 0 -- no ACL by default
+
+    has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
+
+    is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
+
+    ephemeral_or_children_data = other.ephemeral_or_children_data;
+
+    data_size = other.data_size;
+    if (data_size != 0)
+    {
+        data = new char[data_size];
+        memcpy(data, other.data, data_size);
+    }
+
+    version = other.version;
+    cversion = other.cversion;
+    aversion = other.aversion;
+
     cached_digest = other.cached_digest;
 }
 
@@ -284,9 +335,9 @@ void KeeperStorage::initializeSystemNodes()
         removeDigest(current_root_it->value, "/");
         auto updated_root_it = container.updateValue(
             "/",
-            [](auto & node)
+            [](KeeperStorage::Node & node)
             {
-                ++node.stat.numChildren;
+                node.increaseNumChildren();
                 node.addChild(getBaseNodeName(keeper_system_path));
             }
         );
@@ -359,7 +410,7 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
             {
                 assert(!node);
                 node = std::make_shared<Node>();
-                node->stat = operation.stat;
+                node->copyStats(operation.stat);
                 node->setData(operation.data);
                 acls = operation.acls;
                 last_applied_zxid = delta.zxid;
@@ -673,7 +724,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid)
                     if (node_it == container.end())
                         onStorageInconsistency();
 
-                    if (operation.version != -1 && operation.version != node_it->value.stat.version)
+                    if (operation.version != -1 && operation.version != node_it->value.version)
                         onStorageInconsistency();
 
                     removeDigest(node_it->value, path);
@@ -695,7 +746,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid)
                     if (node_it == container.end())
                         onStorageInconsistency();
 
-                    if (operation.version != -1 && operation.version != node_it->value.stat.aversion)
+                    if (operation.version != -1 && operation.version != node_it->value.aversion)
                         onStorageInconsistency();
 
                     acl_map.removeUsage(node_it->value.acl_id);
@@ -740,7 +791,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid)
 bool KeeperStorage::createNode(
     const std::string & path,
     String data,
-    const KeeperStorage::Node::Stat & stat,
+    const Coordination::Stat & stat,
     Coordination::ACLs node_acls)
 {
     auto parent_path = parentNodePath(path);
@@ -749,7 +800,7 @@ bool KeeperStorage::createNode(
     if (node_it == container.end())
         return false;
 
-    if (node_it->value.stat.ephemeralOwner != 0)
+    if (node_it->value.isEphemeral())
         return false;
 
     if (container.contains(path))
@@ -761,8 +812,8 @@ bool KeeperStorage::createNode(
     acl_map.addUsage(acl_id);
 
     created_node.acl_id = acl_id;
-    created_node.stat = stat;
-    created_node.setData(std::move(data));
+    created_node.copyStats(stat);
+    created_node.setData(data);
     auto [map_key, _] = container.insert(path, created_node);
     /// Take child path from key owned by map.
     auto child_path = getBaseNodeName(map_key->getKey());
@@ -771,7 +822,7 @@ bool KeeperStorage::createNode(
             [child_path](KeeperStorage::Node & parent)
             {
                 parent.addChild(child_path);
-                chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
+                chassert(parent.numChildren() == static_cast<int32_t>(parent.getChildren().size()));
             }
     );
 
@@ -785,21 +836,22 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version)
     if (node_it == container.end())
         return false;
 
-    if (version != -1 && version != node_it->value.stat.version)
+    if (version != -1 && version != node_it->value.version)
         return false;
 
-    if (node_it->value.stat.numChildren)
+    if (node_it->value.numChildren())
         return false;
 
-    auto prev_node = node_it->value;
-    acl_map.removeUsage(prev_node.acl_id);
+    KeeperStorage::Node prev_node;
+    prev_node.shallowCopy(node_it->value);
+    acl_map.removeUsage(node_it->value.acl_id);
 
     container.updateValue(
         parentNodePath(path),
         [child_basename = getBaseNodeName(node_it->key)](KeeperStorage::Node & parent)
         {
             parent.removeChild(child_basename);
-            chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
+            chassert(parent.numChildren() == static_cast<int32_t>(parent.getChildren().size()));
         }
     );
 
@@ -959,7 +1011,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
         if (parent_node == nullptr)
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}};
 
-        else if (parent_node->stat.ephemeralOwner != 0)
+        else if (parent_node->isEphemeral())
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}};
 
         std::string path_created = request.path;
@@ -968,7 +1020,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
             if (request.not_exists)
                 return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}};
 
-            auto seq_num = parent_node->seq_num;
+            auto seq_num = parent_node->seqNum();
 
             std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
             seq_num_str.exceptions(std::ios::failbit);
@@ -1008,20 +1060,20 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
         auto parent_update = [parent_cversion, zxid](KeeperStorage::Node & node)
         {
             /// Increment sequential number even if node is not sequential
-            ++node.seq_num;
+            node.increaseSeqNum();
             if (parent_cversion == -1)
-                ++node.stat.cversion;
-            else if (parent_cversion > node.stat.cversion)
-                node.stat.cversion = parent_cversion;
+                ++node.cversion;
+            else if (parent_cversion > node.cversion)
+                node.cversion = parent_cversion;
 
-            if (zxid > node.stat.pzxid)
-                node.stat.pzxid = zxid;
-            ++node.stat.numChildren;
+            if (zxid > node.pzxid)
+                node.pzxid = zxid;
+            node.increaseNumChildren();
         };
 
         new_deltas.emplace_back(std::string{parent_path}, zxid, KeeperStorage::UpdateNodeDelta{std::move(parent_update)});
 
-        KeeperStorage::Node::Stat stat;
+        Coordination::Stat stat;
         stat.czxid = zxid;
         stat.mzxid = zxid;
         stat.pzxid = zxid;
@@ -1135,7 +1187,8 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
         else
         {
             node_it->value.setResponseStat(response.stat);
-            response.data = node_it->value.getData();
+            auto data = node_it->value.getData();
+            response.data = std::string(data.data, data.size);
             response.error = Coordination::Error::ZOK;
         }
 
@@ -1192,8 +1245,8 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
                 {
                     [zxid](KeeperStorage::Node & parent)
                     {
-                        if (parent.stat.pzxid < zxid)
-                            parent.stat.pzxid = zxid;
+                        if (parent.pzxid < zxid)
+                            parent.pzxid = zxid;
                    }
                 }
             );
@@ -1207,9 +1260,9 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
                 update_parent_pzxid();
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}};
         }
-        else if (request.version != -1 && request.version != node->stat.version)
+        else if (request.version != -1 && request.version != node->version)
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
-        else if (node->stat.numChildren != 0)
+        else if (node->numChildren() != 0)
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOTEMPTY}};
 
         if (request.restored_from_zookeeper_log)
@@ -1220,14 +1273,14 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
             zxid,
             KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent)
                                            {
-                                               ++parent.stat.cversion;
-                                               --parent.stat.numChildren;
+                                               ++parent.cversion;
+                                               --parent.ephemeral_or_children_data.children_info.num_children;
                                            }});
 
-        new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version, node->stat.ephemeralOwner});
+        new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version, node->ephemeralOwner()});
 
-        if (node->stat.ephemeralOwner != 0)
-            storage.unregisterEphemeralPath(node->stat.ephemeralOwner, request.path);
+        if (node->isEphemeral())
+            storage.unregisterEphemeralPath(node->ephemeralOwner(), request.path);
 
         digest = storage.calculateNodesDigest(digest, new_deltas);
 
@@ -1341,7 +1394,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
 
         auto node = storage.uncommitted_state.getNode(request.path);
 
-        if (request.version != -1 && request.version != node->stat.version)
+        if (request.version != -1 && request.version != node->version)
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
 
         new_deltas.emplace_back(
@@ -1350,9 +1403,9 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
             KeeperStorage::UpdateNodeDelta{
                 [zxid, data = request.data, time](KeeperStorage::Node & value)
                 {
-                    value.stat.version++;
-                    value.stat.mzxid = zxid;
-                    value.stat.mtime = time;
+                    value.version++;
+                    value.mzxid = zxid;
+                    value.setMtime(time);
                     value.setData(data);
                 },
                 request.version});
@@ -1364,7 +1417,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
                 {
                     [](KeeperStorage::Node & parent)
                     {
-                        parent.stat.cversion++;
+                        parent.cversion++;
                     }
                 }
         );
@@ -1478,7 +1531,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
                 if (child_it == container.end())
                     onStorageInconsistency();
 
-                const auto is_ephemeral = child_it->value.stat.ephemeralOwner != 0;
+                const auto is_ephemeral = child_it->value.isEphemeral();
                 return (is_ephemeral && list_request_type == EPHEMERAL_ONLY) || (!is_ephemeral && list_request_type == PERSISTENT_ONLY);
             };
 
@@ -1531,7 +1584,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
         auto node = storage.uncommitted_state.getNode(request.path);
         if (check_not_exists)
         {
-            if (node && (request.version == -1 || request.version == node->stat.version))
+            if (node && (request.version == -1 || request.version == node->version))
                 return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}};
         }
         else
@@ -1539,7 +1592,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
             if (!node)
                 return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}};
 
-            if (request.version != -1 && request.version != node->stat.version)
+            if (request.version != -1 && request.version != node->version)
                 return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
         }
 
@@ -1575,7 +1628,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
 
         if (check_not_exists)
         {
-            if (node_it != container.end() && (request.version == -1 || request.version == node_it->value.stat.version))
+            if (node_it != container.end() && (request.version == -1 || request.version == node_it->value.version))
                 on_error(Coordination::Error::ZNODEEXISTS);
             else
                 response.error = Coordination::Error::ZOK;
@@ -1584,7 +1637,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
         {
             if (node_it == container.end())
                 on_error(Coordination::Error::ZNONODE);
-            else if (request.version != -1 && request.version != node_it->value.stat.version)
+            else if (request.version != -1 && request.version != node_it->value.version)
                 on_error(Coordination::Error::ZBADVERSION);
             else
                 response.error = Coordination::Error::ZOK;
@@ -1637,7 +1690,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr
 
         auto node = uncommitted_state.getNode(request.path);
 
-        if (request.version != -1 && request.version != node->stat.aversion)
+        if (request.version != -1 && request.version != node->aversion)
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
 
 
@@ -1657,7 +1710,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr
                 zxid,
                 KeeperStorage::UpdateNodeDelta
                 {
-                    [](KeeperStorage::Node & n) { ++n.stat.aversion; }
+                    [](KeeperStorage::Node & n) { ++n.aversion; }
                 }
             }
         };
@@ -2075,7 +2128,7 @@ UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vec
                 [&](const CreateNodeDelta & create_delta)
                 {
                     auto node = std::make_shared<Node>();
-                    node->stat = create_delta.stat;
+                    node->copyStats(create_delta.stat);
                     node->setData(create_delta.data);
                     updated_nodes.emplace(delta.path, node);
                 },
@@ -2198,8 +2251,8 @@ void KeeperStorage::preprocessRequest(
                     {
                         [ephemeral_path](Node & parent)
                         {
-                            ++parent.stat.cversion;
-                            --parent.stat.numChildren;
+                            ++parent.cversion;
+                            --parent.ephemeral_or_children_data.children_info.num_children;
                         }
                     }
                 );
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index 01c1413a884..f14a6ed772c 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -35,40 +35,144 @@ public:
     /// New fields should be added to the struct only if it's really necessary
     struct Node
     {
-        /// to reduce size of the Node struct we use a custom Stat without dataLength
-        struct Stat
-        {
-            int64_t czxid{0};
-            int64_t mzxid{0};
-            int64_t ctime{0};
-            int64_t mtime{0};
-            int32_t version{0};
-            int32_t cversion{0};
-            int32_t aversion{0};
-            int32_t numChildren{0}; /// NOLINT
-            int64_t ephemeralOwner{0}; /// NOLINT
-            int64_t pzxid{0};
-
-            bool operator==(const Stat &) const = default;
-        };
-
+        int64_t czxid{0};
+        int64_t mzxid{0};
+        int64_t pzxid{0};
         uint64_t acl_id = 0; /// 0 -- no ACL by default
-        Stat stat{};
-        int32_t seq_num = 0;
+
+        mutable struct
+        {
+            bool has_cached_digest : 1;
+            int64_t ctime : 7;
+        } has_cached_digest_and_ctime{false, 0};
+
+        struct
+        {
+            bool is_ephemeral : 1;
+            int64_t mtime : 7;
+        } is_ephemeral_and_mtime{false, 0};
+
+
+        union
+        {
+            int64_t ephemeral_owner;
+            struct
+            {
+                int32_t seq_num;
+                int32_t num_children;
+            } children_info;
+        } ephemeral_or_children_data{0};
+
+        char * data{nullptr};
+        uint32_t data_size{0};
+
+        int32_t version{0};
+        int32_t cversion{0};
+        int32_t aversion{0};
 
         /// we cannot use `std::optional<uint64_t> because we want to
         /// pack the boolean with seq_num above
-        mutable bool has_cached_digest = false;
         mutable uint64_t cached_digest = 0;
 
+        ~Node()
+        {
+            if (data_size)
+                delete [] data;
+        }
+
+        Node() = default;
+
+        Node & operator=(const Node & other)
+        {
+            if (this == &other)
+                return *this;
+
+            czxid = other.czxid;
+            mzxid = other.mzxid;
+            pzxid = other.pzxid;
+            acl_id = other.acl_id;
+            has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
+            is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
+            ephemeral_or_children_data = other.ephemeral_or_children_data;
+            data_size = other.data_size;
+            version = other.version;
+            cversion = other.cversion;
+            aversion = other.aversion;
+
+            if (data_size != 0)
+            {
+                data = new char[data_size];
+                memcpy(data, other.data, data_size);
+            }
+            return *this;
+        }
+
+        Node(const Node & other)
+        {
+            *this = other;
+        }
+
+        bool isEphemeral() const
+        {
+
+            return is_ephemeral_and_mtime.is_ephemeral;
+        }
+
+        int64_t ephemeralOwner() const
+        {
+            return isEphemeral() ? ephemeral_or_children_data.ephemeral_owner : 0;
+        }
+
+        int32_t numChildren() const
+        {
+            return ephemeral_or_children_data.children_info.num_children;
+        }
+
+        void increaseNumChildren()
+        {
+            ++ephemeral_or_children_data.children_info.num_children;
+        }
+
+        int32_t seqNum() const
+        {
+            return ephemeral_or_children_data.children_info.seq_num;
+        }
+
+        void increaseSeqNum()
+        {
+            ++ephemeral_or_children_data.children_info.seq_num;
+        }
+
+        int64_t ctime() const
+        {
+            return has_cached_digest_and_ctime.ctime;
+        }
+
+        void setCtime(uint64_t ctime)
+        {
+            has_cached_digest_and_ctime.ctime = ctime;
+        }
+
+        int64_t mtime() const
+        {
+            return is_ephemeral_and_mtime.mtime;
+        }
+
+        void setMtime(uint64_t mtime)
+        {
+            is_ephemeral_and_mtime.mtime = mtime;
+        }
+
+        void copyStats(const Coordination::Stat & stat);
+
         void setResponseStat(Coordination::Stat & response_stat) const;
 
         /// Object memory size
         uint64_t sizeInBytes() const;
 
-        void setData(String new_data);
+        void setData(const String & new_data);
 
-        const auto & getData() const noexcept { return data; }
+        StringRef getData() const noexcept { return {data, data_size}; }
 
         void addChild(StringRef child_path);
 
@@ -87,7 +191,6 @@ public:
         // (e.g. we don't need to copy list of children)
         void shallowCopy(const Node & other);
     private:
-        String data;
         ChildrenSet children{};
     };
 
@@ -177,7 +280,7 @@ public:
     //  - quickly commit the changes to the storage
     struct CreateNodeDelta
     {
-        KeeperStorage::Node::Stat stat;
+        Coordination::Stat stat;
         Coordination::ACLs acls;
         String data;
     };
@@ -342,7 +445,7 @@ public:
     bool createNode(
         const std::string & path,
         String data,
-        const KeeperStorage::Node::Stat & stat,
+        const Coordination::Stat & stat,
         Coordination::ACLs node_acls);
 
     // Remove node in the storage
diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index ac8d36745c2..716f08faf64 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -19,55 +19,47 @@ struct ListNode
     StringRef key;
     V value;
 
-    /// |*                *            ****** |
-    ///  ^                ^            ^
-    ///  active_in_map    free_key     version
-    ///  (1 byte)         (1 byte)     (6 bytes)
-    uint64_t node_metadata = 0;
+    struct 
+    {
+        bool active_in_map : 1;
+        bool free_key : 1;
+        uint64_t version : 6;
+    } node_metadata{false, false, 0};
 
     void setInactiveInMap()
     {
-        node_metadata &= ~active_in_map_mask;
+        node_metadata.active_in_map = false;
     }
 
     void setActiveInMap()
     {
-        node_metadata |= active_in_map_mask;
+        node_metadata.active_in_map = true;
     }
 
     bool isActiveInMap()
     {
-        return node_metadata & active_in_map_mask;
+        return node_metadata.active_in_map;
     }
 
     void setFreeKey()
     {
-        node_metadata |= free_key_mask;
+        node_metadata.free_key = true;
     }
 
     bool getFreeKey()
     {
-        return node_metadata & free_key_mask;
+        return node_metadata.free_key;
     }
 
     uint64_t getVersion()
     {
-        return node_metadata & version_mask;
+        return node_metadata.version;
     }
 
     void setVersion(uint64_t version)
     {
-        if (version > version_mask)
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR, "Snapshot version {} is larger than maximum allowed value {}", version, version_mask);
-
-        node_metadata &= ~version_mask;
-        node_metadata |= version;
+        node_metadata.version = version;
     }
-
-    static constexpr uint64_t active_in_map_mask = static_cast<uint64_t>(1) << 63;
-    static constexpr uint64_t free_key_mask      = static_cast<uint64_t>(1) << 62;
-    static constexpr uint64_t version_mask       = ~(static_cast<uint64_t>(3) << 62);
 };
 
 template <class V>
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index b55ebef327f..b4334893849 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -105,7 +105,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L
         Coordination::read(node.acl_id, in);
 
         /// Deserialize stat
-        Coordination::read(node.stat.czxid, in);
+        Coordination::read(node.src/Coordination/ZooKeeperDataReader.cppstat.czxid, in);
         Coordination::read(node.stat.mzxid, in);
         /// For some reason ZXID specified in filename can be smaller
         /// then actual zxid from nodes. In this case we will use zxid from nodes.

From 8c7218bac2fa09356750e23e79ed686c879665b6 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 22 Jan 2024 09:40:19 +0000
Subject: [PATCH 140/884] Store latest logs inmemory

---
 src/Coordination/Changelog.cpp | 393 +++++++++++++++++++++++++--------
 src/Coordination/Changelog.h   |  48 +++-
 2 files changed, 341 insertions(+), 100 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 7f1135eec94..c06a8bad91a 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1,4 +1,5 @@
 #include <filesystem>
+#include <iterator>
 #include <Coordination/Changelog.h>
 #include <Disks/DiskLocal.h>
 #include <IO/ReadBufferFromFile.h>
@@ -49,9 +50,15 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip
     }
     disk_from->copyFile(from_path, *disk_to, path_to, {});
     disk_to->removeFile(tmp_changelog_name);
+
+    /// a different thread could be trying to read from the file
+    /// we should make sure the source disk contains the file while read is in progress
+    {
+        std::lock_guard file_lock(description->file_mutex);
+        description->disk = disk_to;
+    }
     disk_from->removeFile(description->path);
     description->path = path_to;
-    description->disk = disk_to;
 }
 
 constexpr auto DEFAULT_PREFIX = "changelog";
@@ -111,9 +118,11 @@ class ChangelogWriter
 public:
     ChangelogWriter(
         std::map<uint64_t, ChangelogFileDescriptionPtr> & existing_changelogs_,
+        LogEntryStorage & entry_storage_,
         KeeperContextPtr keeper_context_,
         LogFileSettings log_file_settings_)
         : existing_changelogs(existing_changelogs_)
+        , entry_storage(entry_storage_)
         , log_file_settings(log_file_settings_)
         , keeper_context(std::move(keeper_context_))
         , log(&Poco::Logger::get("Changelog"))
@@ -238,6 +247,7 @@ public:
         }
 
         auto & write_buffer = getBuffer();
+        auto current_position = write_buffer.count();
         writeIntBinary(computeRecordChecksum(record), write_buffer);
 
         writeIntBinary(record.header.version, write_buffer);
@@ -255,6 +265,11 @@ public:
             /// Flush compressed data to file buffer
             compressed_buffer->next();
         }
+        else
+        {
+            unflushed_indices_with_log_location.emplace_back(
+                record.header.index, LogLocation{.file_description = current_file_description, .position = current_position});
+        }
 
         last_index_written = record.header.index;
 
@@ -272,6 +287,8 @@ public:
             else
                 file_buffer->next();
         }
+        entry_storage.addLogLocations(std::move(unflushed_indices_with_log_location));
+        unflushed_indices_with_log_location.clear();
     }
 
     uint64_t getStartIndex() const
@@ -314,9 +331,9 @@ public:
 private:
     void finalizeCurrentFile()
     {
-        assert(prealloc_done);
+        chassert(prealloc_done);
 
-        assert(current_file_description);
+        chassert(current_file_description);
         // compact can delete the file and we don't need to do anything
         if (current_file_description->deleted)
         {
@@ -400,9 +417,11 @@ private:
     {
         const auto * file_buffer = tryGetFileBuffer();
 
+        if (file_buffer)
+            initial_file_size = getSizeFromFileDescriptor(file_buffer->getFD());
+
         if (log_file_settings.max_size == 0 || !file_buffer)
         {
-            initial_file_size = 0;
             prealloc_done = true;
             return;
         }
@@ -428,7 +447,6 @@ private:
             }
         }
 #endif
-        initial_file_size = getSizeFromFileDescriptor(file_buffer->getFD());
 
         prealloc_done = true;
     }
@@ -441,6 +459,10 @@ private:
 
     std::map<uint64_t, ChangelogFileDescriptionPtr> & existing_changelogs;
 
+    LogEntryStorage & entry_storage;
+
+    std::vector<std::pair<uint64_t, LogLocation>> unflushed_indices_with_log_location;
+
     ChangelogFileDescriptionPtr current_file_description{nullptr};
     std::unique_ptr<WriteBufferFromFileBase> file_buf;
     std::optional<uint64_t> last_index_written;
@@ -482,69 +504,88 @@ struct ChangelogReadResult
     bool error;
 };
 
+namespace
+{
+
+ChangelogRecord readChangelogRecord(ReadBuffer & read_buf, const std::string & filepath)
+{
+    /// Read checksum
+    Checksum record_checksum;
+    readIntBinary(record_checksum, read_buf);
+
+    /// Read header
+    ChangelogRecord record;
+    readIntBinary(record.header.version, read_buf);
+    readIntBinary(record.header.index, read_buf);
+    readIntBinary(record.header.term, read_buf);
+    readIntBinary(record.header.value_type, read_buf);
+    readIntBinary(record.header.blob_size, read_buf);
+
+    if (record.header.version > CURRENT_CHANGELOG_VERSION)
+        throw Exception(
+            ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast<uint8_t>(record.header.version), filepath);
+
+    /// Read data
+    if (record.header.blob_size != 0)
+    {
+        auto buffer = nuraft::buffer::alloc(record.header.blob_size);
+        auto * buffer_begin = reinterpret_cast<char *>(buffer->data_begin());
+        read_buf.readStrict(buffer_begin, record.header.blob_size);
+        record.blob = buffer;
+    }
+    else
+        record.blob = nullptr;
+
+    /// Compare checksums
+    Checksum checksum = computeRecordChecksum(record);
+    if (checksum != record_checksum)
+    {
+        throw Exception(
+            ErrorCodes::CHECKSUM_DOESNT_MATCH,
+            "Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
+            filepath,
+            record.header.version,
+            record.header.index,
+            record.header.blob_size);
+    }
+
+    return record;
+}
+
+LogEntryPtr logEntryFromRecord(const ChangelogRecord & record)
+{
+    return nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, static_cast<nuraft::log_val_type>(record.header.value_type));
+}
+
+}
+
 class ChangelogReader
 {
 public:
-    explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) : disk(disk_), filepath(filepath_)
+    explicit ChangelogReader(ChangelogFileDescriptionPtr changelog_description_) : changelog_description(changelog_description_)
     {
-        compression_method = chooseCompressionMethod(filepath, "");
-        auto read_buffer_from_file = disk->readFile(filepath);
+        compression_method = chooseCompressionMethod(changelog_description->path, "");
+        auto read_buffer_from_file = changelog_description->disk->readFile(changelog_description->path);
         read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buffer_from_file), compression_method);
     }
 
     /// start_log_index -- all entries with index < start_log_index will be skipped, but accounted into total_entries_read_from_log
-    ChangelogReadResult readChangelog(IndexToLogEntry & logs, uint64_t start_log_index, Poco::Logger * log)
+    ChangelogReadResult readChangelog(LogEntryStorage & entry_storage, uint64_t start_log_index, Poco::Logger * log)
     {
         ChangelogReadResult result{};
         result.compressed_log = compression_method != CompressionMethod::None;
+        const auto & filepath = changelog_description->path;
         try
         {
             while (!read_buf->eof())
             {
                 result.last_position = read_buf->count();
-                /// Read checksum
-                Checksum record_checksum;
-                readIntBinary(record_checksum, *read_buf);
 
-                /// Read header
-                ChangelogRecord record;
-                readIntBinary(record.header.version, *read_buf);
-                readIntBinary(record.header.index, *read_buf);
-                readIntBinary(record.header.term, *read_buf);
-                readIntBinary(record.header.value_type, *read_buf);
-                readIntBinary(record.header.blob_size, *read_buf);
-
-                if (record.header.version > CURRENT_CHANGELOG_VERSION)
-                    throw Exception(
-                        ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast<uint8_t>(record.header.version), filepath);
-
-                /// Read data
-                if (record.header.blob_size != 0)
-                {
-                    auto buffer = nuraft::buffer::alloc(record.header.blob_size);
-                    auto * buffer_begin = reinterpret_cast<char *>(buffer->data_begin());
-                    read_buf->readStrict(buffer_begin, record.header.blob_size);
-                    record.blob = buffer;
-                }
-                else
-                    record.blob = nullptr;
-
-                /// Compare checksums
-                Checksum checksum = computeRecordChecksum(record);
-                if (checksum != record_checksum)
-                {
-                    throw Exception(
-                        ErrorCodes::CHECKSUM_DOESNT_MATCH,
-                        "Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
-                        filepath,
-                        record.header.version,
-                        record.header.index,
-                        record.header.blob_size);
-                }
+                auto record = readChangelogRecord(*read_buf, filepath);
 
                 /// Check for duplicated changelog ids
-                if (logs.contains(record.header.index))
-                    std::erase_if(logs, [&record](const auto & item) { return item.first >= record.header.index; });
+                if (entry_storage.contains(record.header.index))
+                    entry_storage.eraseIf([&record](const auto index) { return index >= record.header.index; });
 
                 result.total_entries_read_from_log += 1;
 
@@ -553,12 +594,15 @@ public:
                     continue;
 
                 /// Create log entry for read data
-                auto log_entry = nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, static_cast<nuraft::log_val_type>(record.header.value_type));
+                auto log_entry = logEntryFromRecord(record);
                 if (result.first_read_index == 0)
                     result.first_read_index = record.header.index;
 
                 /// Put it into in memory structure
-                logs.emplace(record.header.index, log_entry);
+                entry_storage.addEntryWithLocation(
+                    record.header.index,
+                    log_entry,
+                    LogLocation{.file_description = changelog_description, .position = static_cast<size_t>(result.last_position)});
                 result.last_read_index = record.header.index;
 
                 if (result.total_entries_read_from_log % 50000 == 0)
@@ -585,12 +629,189 @@ public:
     }
 
 private:
-    DiskPtr disk;
-    std::string filepath;
+    ChangelogFileDescriptionPtr changelog_description;
     CompressionMethod compression_method;
     std::unique_ptr<ReadBuffer> read_buf;
 };
 
+size_t LogEntryStorage::size() const
+{
+    return total_entries;
+}
+
+void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry)
+{
+    logs_cache.insert_or_assign(index, log_entry);
+    if (logs_cache.size() == 1)
+        min_index_in_cache = index;
+
+    ++total_entries;
+}
+
+void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location)
+{
+    logs_cache.emplace(index, log_entry);
+    logs_location.emplace(index, std::move(log_location));
+    if (logs_cache.size() == 1)
+        min_index_in_cache = index;
+    else if (logs_cache.size() > 1000)
+    {
+        logs_cache.erase(min_index_in_cache);
+        ++min_index_in_cache;
+    }
+}
+
+void LogEntryStorage::eraseIf(std::function<bool(size_t)> index_predicate)
+{
+    std::erase_if(logs_cache, [&](const auto & item) { return index_predicate(item.first); });
+}
+
+bool LogEntryStorage::contains(uint64_t index) const
+{
+    return logs_cache.contains(index);
+}
+
+LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
+{
+    if (index >= min_index_in_cache)
+        return logs_cache.at(index);
+
+    std::lock_guard lock(logs_location_mutex);
+
+    if (auto it = logs_location.find(index); it != logs_location.end())
+    {
+        const auto & [changelog_description, position] = it->second;
+        std::lock_guard file_lock(changelog_description->file_mutex);
+        //std::cout << "Reading from path " << changelog_description->path << std::endl;
+        auto file = changelog_description->disk->readFile(changelog_description->path);
+        file->seek(position, SEEK_SET);
+
+        auto record = readChangelogRecord(*file, changelog_description->path);
+        return logEntryFromRecord(record);
+    }
+    else
+        std::cout << "Nothing found" << std::endl;
+
+    return nullptr;
+}
+
+void LogEntryStorage::clear()
+{
+    logs_cache.clear();
+}
+
+LogEntryPtr LogEntryStorage::getLatestConfigChange() const
+{
+    for (const auto & [_, entry] : logs_cache)
+        if (entry->get_val_type() == nuraft::conf)
+            return entry;
+    return nullptr;
+}
+
+void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocation>> indices_with_log_locations)
+{
+    std::lock_guard lock(logs_location_mutex);
+    unapplied_indices_with_log_locations.insert(
+        unapplied_indices_with_log_locations.end(),
+        std::make_move_iterator(indices_with_log_locations.begin()),
+        std::make_move_iterator(indices_with_log_locations.end()));
+}
+
+void LogEntryStorage::refreshCache()
+{
+    if (logs_cache.size() <= 1000)
+        return;
+
+    std::lock_guard lock(logs_location_mutex);
+    if (logs_location.empty())
+        return;
+
+    auto max_index_to_remove = min_index_in_cache + (logs_cache.size() - 1000);
+    for (auto & [index, log_location] : unapplied_indices_with_log_locations)
+    {
+        logs_location.emplace(index, std::move(log_location));
+        max_index_with_location = index;
+    }
+
+    for (size_t index = min_index_in_cache; index < max_index_to_remove; ++index)
+    {
+        if (index <= max_index_with_location)
+        {
+            logs_cache.erase(index);
+            min_index_in_cache = index + 1;
+        }
+    }
+
+    unapplied_indices_with_log_locations.clear();
+}
+
+LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end) const
+{
+    LogEntriesPtr ret = nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
+    ret->reserve(end - start);
+    
+    /// we rely on fact that changelogs need to be written sequentially with
+    /// no other writes between
+    struct ReadInfo
+    {
+        ChangelogFileDescriptionPtr file_description;
+        size_t start_position = 0;
+        size_t count = 0;
+    };
+
+    /// we have to collect some logs from disks because they are not cached
+    if (start < min_index_in_cache)
+    {
+        //std::cout << "Reading some from disk" << std::endl;
+        std::lock_guard logs_location_lock(logs_location_mutex);
+        std::vector<ReadInfo> read_infos;
+        for (uint64_t i = start; i < min_index_in_cache && i < end; ++i)
+        {
+            const auto & log_location = logs_location.at(i);
+            const auto push_new_file = [&]
+            {
+                read_infos.push_back(ReadInfo
+                {
+                    .file_description = log_location.file_description,
+                    .start_position = log_location.position,
+                    .count = 1,
+                });
+            };
+
+            if (read_infos.empty())
+                push_new_file();
+            else if (auto & last = read_infos.back(); log_location.file_description == last.file_description)
+                ++last.count;
+            else
+                push_new_file();
+        }
+
+        for (const auto & [file_description, start_position, count] : read_infos)
+        {
+            std::cout << "Reading from path " << file_description->path << " " << count << " entries" << std::endl;
+            std::lock_guard file_lock(file_description->file_mutex);
+            auto file = file_description->disk->readFile(file_description->path);
+            file->seek(start_position, SEEK_SET);
+
+            for (size_t i = 0; i < count; ++i)
+            {
+                auto record = readChangelogRecord(*file, file_description->path);
+                ret->push_back(logEntryFromRecord(record));
+            }
+        }
+
+        start = min_index_in_cache;
+    }
+    else
+        std::cout << "Nothing read from disk" << std::endl;
+
+    for (uint64_t i = start; i < end; ++i)
+        ret->push_back(logs_cache.at(i));
+
+    return ret;
+
+}
+
 Changelog::Changelog(
     Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_)
     : changelogs_detached_dir("detached")
@@ -706,7 +927,7 @@ Changelog::Changelog(
 
     append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); });
 
-    current_writer = std::make_unique<ChangelogWriter>(existing_changelogs, keeper_context, log_file_settings);
+    current_writer = std::make_unique<ChangelogWriter>(existing_changelogs, entry_storage, keeper_context, log_file_settings);
 }
 
 void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep)
@@ -783,8 +1004,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
                 break;
             }
 
-            ChangelogReader reader(changelog_description.disk, changelog_description.path);
-            last_log_read_result = reader.readChangelog(logs, start_to_read_from, log);
+            ChangelogReader reader(changelog_description_ptr);
+            last_log_read_result = reader.readChangelog(entry_storage, start_to_read_from, log);
 
             if (last_log_read_result->last_read_index != 0)
                 last_read_index = last_log_read_result->last_read_index;
@@ -861,13 +1082,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             remove_invalid_logs();
             description->disk->removeFile(description->path);
             existing_changelogs.erase(last_log_read_result->log_start_index);
-            std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; });
+            entry_storage.eraseIf([last_log_read_result](const auto index) { return index >= last_log_read_result->log_start_index; });
         }
         else if (last_log_read_result->error)
         {
             LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path);
             remove_invalid_logs();
-            std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first > last_log_read_result->last_read_index; });
+            entry_storage.eraseIf([last_log_read_result](const auto index) { return index > last_log_read_result->last_read_index; });
             move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
         }
         /// don't mix compressed and uncompressed writes
@@ -902,7 +1123,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             moveFileBetweenDisks(description->disk, description, disk, description->path);
     }
 
-
     initialized = true;
 }
 
@@ -1006,14 +1226,14 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
     LOG_WARNING(log, "Removing changelogs that go after broken changelog entry");
     removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end());
 
-    std::erase_if(logs, [start_to_remove_from_log_id](const auto & item) { return item.first >= start_to_remove_from_log_id; });
+    entry_storage.eraseIf([start_to_remove_from_log_id](const auto index) { return index >= start_to_remove_from_log_id; });
 }
 
 void Changelog::removeAllLogs()
 {
     LOG_WARNING(log, "Removing all changelogs");
     removeExistingLogs(existing_changelogs.begin(), existing_changelogs.end());
-    logs.clear();
+    entry_storage.clear();
 }
 
 ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_entry)
@@ -1157,10 +1377,10 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
     if (!initialized)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
 
-    if (logs.empty())
+    if (min_log_id == 0)
         min_log_id = index;
 
-    logs[index] = log_entry;
+    entry_storage.addEntry(index, log_entry);
     max_log_id = index;
 
     if (!write_operations.push(AppendLog{index, log_entry}))
@@ -1207,7 +1427,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
 
     /// Remove redundant logs from memory
     /// Everything >= index must be removed
-    std::erase_if(logs, [index](const auto & item) { return item.first >= index; });
+    entry_storage.eraseIf([index](const auto current_index) { return current_index >= index; });
 
     /// Now we can actually override entry at index
     appendEntry(index, log_entry);
@@ -1276,7 +1496,8 @@ void Changelog::compact(uint64_t up_to_log_index)
     }
     /// Compaction from the past is possible, so don't make our min_log_id smaller.
     min_log_id = std::max(min_log_id, up_to_log_index + 1);
-    std::erase_if(logs, [up_to_log_index](const auto & item) { return item.first <= up_to_log_index; });
+
+    entry_storage.eraseIf([up_to_log_index](const auto index) { return index <= up_to_log_index; });
 
     if (need_rotate)
         current_writer->rotate(up_to_log_index + 1);
@@ -1289,46 +1510,26 @@ LogEntryPtr Changelog::getLastEntry() const
     /// This entry treaded in special way by NuRaft
     static LogEntryPtr fake_entry = nuraft::cs_new<nuraft::log_entry>(0, nuraft::buffer::alloc(sizeof(uint64_t)));
 
-    auto entry = logs.find(max_log_id);
-    if (entry == logs.end())
-    {
+    auto entry = entry_storage.getEntry(max_log_id);
+    if (entry == nullptr)
         return fake_entry;
-    }
 
-    return entry->second;
+    return entry;
 }
 
 LogEntriesPtr Changelog::getLogEntriesBetween(uint64_t start, uint64_t end)
 {
-    LogEntriesPtr ret = nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
-
-    ret->resize(end - start);
-    uint64_t result_pos = 0;
-    for (uint64_t i = start; i < end; ++i)
-    {
-        (*ret)[result_pos] = entryAt(i);
-        result_pos++;
-    }
-    return ret;
+    return entry_storage.getLogEntriesBetween(start, end);
 }
 
 LogEntryPtr Changelog::entryAt(uint64_t index)
 {
-    nuraft::ptr<nuraft::log_entry> src = nullptr;
-    auto entry = logs.find(index);
-    if (entry == logs.end())
-        return nullptr;
-
-    src = entry->second;
-    return src;
+    return entry_storage.getEntry(index);
 }
 
 LogEntryPtr Changelog::getLatestConfigChange() const
 {
-    for (const auto & [_, entry] : logs)
-        if (entry->get_val_type() == nuraft::conf)
-            return entry;
-    return nullptr;
+    return entry_storage.getLatestConfigChange();
 }
 
 nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index, int32_t count)
@@ -1339,11 +1540,11 @@ nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index,
     uint64_t size_total = 0;
     for (uint64_t i = index; i < index + count; ++i)
     {
-        auto entry = logs.find(i);
-        if (entry == logs.end())
+        auto entry = entry_storage.getEntry(i);
+        if (entry == nullptr)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Don't have log entry {}", i);
 
-        nuraft::ptr<nuraft::buffer> buf = entry->second->serialize();
+        nuraft::ptr<nuraft::buffer> buf = entry->serialize();
         size_total += buf->size();
         returned_logs.push_back(std::move(buf));
     }
@@ -1374,7 +1575,7 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
         buffer.get(buf_local);
 
         LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
-        if (i == 0 && logs.contains(cur_index))
+        if (i == 0 && entry_storage.contains(cur_index))
             writeAt(cur_index, log_entry);
         else
             appendEntry(cur_index, log_entry);
@@ -1409,6 +1610,8 @@ std::shared_ptr<bool> Changelog::flushAsync()
         LOG_WARNING(log, "Changelog is shut down");
         return nullptr;
     }
+
+    entry_storage.refreshCache();
     return failed;
 }
 
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 20f850e3f62..ee212ef3a71 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <optional>
 #include <city.h>
 #include <Disks/IDisk.h>
 #include <IO/CompressionMethod.h>
@@ -23,7 +22,6 @@ using LogEntries = std::vector<LogEntryPtr>;
 using LogEntriesPtr = nuraft::ptr<LogEntries>;
 using BufferPtr = nuraft::ptr<nuraft::buffer>;
 
-using IndexToOffset = std::unordered_map<uint64_t, off_t>;
 using IndexToLogEntry = std::unordered_map<uint64_t, LogEntryPtr>;
 
 enum class ChangelogVersion : uint8_t
@@ -63,6 +61,8 @@ struct ChangelogFileDescription
     DiskPtr disk;
     std::string path;
 
+    std::mutex file_mutex;
+
     bool deleted = false;
 
     /// How many entries should be stored in this log
@@ -87,6 +87,43 @@ struct FlushSettings
     uint64_t max_flush_batch_size = 1000;
 };
 
+struct LogLocation
+{
+    ChangelogFileDescriptionPtr file_description;
+    size_t position;
+};
+
+struct LogEntryStorage
+{
+    size_t size() const;
+
+    void addEntry(uint64_t index, const LogEntryPtr & log_entry);
+    void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location);
+    void eraseIf(std::function<bool(size_t)> index_predicate);
+    bool contains(uint64_t index) const;
+    LogEntryPtr getEntry(uint64_t index) const;
+    void clear();
+    LogEntryPtr getLatestConfigChange() const;
+
+    using IndexWithLogLocation = std::pair<uint64_t, LogLocation>;
+
+    void addLogLocations(std::vector<IndexWithLogLocation> indices_with_log_locations);
+
+    void refreshCache();
+
+    LogEntriesPtr getLogEntriesBetween(uint64_t start, uint64_t end) const;
+private:
+    /// Mapping log_id -> log_entry
+    IndexToLogEntry logs_cache;
+    size_t min_index_in_cache = 0;
+
+    size_t total_entries = 0;
+    mutable std::mutex logs_location_mutex;
+    std::vector<IndexWithLogLocation> unapplied_indices_with_log_locations;
+    std::unordered_map<uint64_t, LogLocation> logs_location;
+    size_t max_index_with_location = 0;
+};
+
 /// Simplest changelog with files rotation.
 /// No compression, no metadata, just entries with headers one by one.
 /// Able to read broken files/entries and discard them. Not thread safe.
@@ -143,7 +180,7 @@ public:
 
     void shutdown();
 
-    uint64_t size() const { return logs.size(); }
+    uint64_t size() const { return entry_storage.size(); }
 
     uint64_t lastDurableIndex() const
     {
@@ -190,8 +227,9 @@ private:
     std::mutex writer_mutex;
     /// Current writer for changelog file
     std::unique_ptr<ChangelogWriter> current_writer;
-    /// Mapping log_id -> log_entry
-    IndexToLogEntry logs;
+
+    LogEntryStorage entry_storage;
+
     /// Start log_id which exists in all "active" logs
     /// min_log_id + 1 == max_log_id means empty log storage for NuRaft
     uint64_t min_log_id = 0;

From 09f1e2840c2859a517c9f76183a7abd51c488b6f Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 25 Jan 2024 10:06:05 +0100
Subject: [PATCH 141/884] Simplified calculatePartSize and upload task

---
 .../AzureBlobStorage/AzureBlobStorageAuth.cpp |   2 -
 .../AzureBlobStorage/AzureObjectStorage.h     |   4 -
 .../copyAzureBlobStorageFile.cpp              | 114 +++++-------------
 3 files changed, 27 insertions(+), 93 deletions(-)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
index cbc2996f5c1..02b0d5bb599 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
@@ -165,9 +165,7 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
         config.getInt(config_prefix + ".max_single_read_retries", 3),
         config.getInt(config_prefix + ".max_single_download_retries", 3),
         config.getInt(config_prefix + ".list_object_keys_size", 1000),
-        config.getUInt64(config_prefix + ".min_upload_part_size", 16 * 1024 * 1024),
         config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
-        config.getUInt64(config_prefix + ".max_part_number", 10000),
         config.getBool(config_prefix + ".use_native_copy", false)
     );
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 3be4989d4f2..30fedb601dc 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -24,18 +24,14 @@ struct AzureObjectStorageSettings
         int max_single_read_retries_,
         int max_single_download_retries_,
         int list_object_keys_size_,
-        size_t min_upload_part_size_,
         size_t max_upload_part_size_,
-        size_t max_part_number_,
         bool use_native_copy_)
         : max_single_part_upload_size(max_single_part_upload_size_)
         , min_bytes_for_seek(min_bytes_for_seek_)
         , max_single_read_retries(max_single_read_retries_)
         , max_single_download_retries(max_single_download_retries_)
         , list_object_keys_size(list_object_keys_size_)
-        , min_upload_part_size(min_upload_part_size_)
         , max_upload_part_size(max_upload_part_size_)
-        , max_part_number(max_part_number_)
         , use_native_copy(use_native_copy_)
     {
     }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 350d2d1d34e..e5517a1a021 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -84,17 +84,10 @@ namespace
 
         struct UploadPartTask
         {
-            char *data = nullptr;
-            size_t size = 0;
-            std::string block_id;
+            std::unique_ptr<ReadBuffer> read_buffer = nullptr;
+            std::vector<std::string> block_ids;
             bool is_finished = false;
             std::exception_ptr exception;
-
-            ~UploadPartTask()
-            {
-                if (data != nullptr)
-                    free(data);
-            }
         };
 
         size_t normal_part_size;
@@ -108,56 +101,11 @@ namespace
 
         void calculatePartSize()
         {
-            if (!total_size)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen");
-
-            auto max_part_number = settings.get()->max_part_number;
-            auto min_upload_part_size = settings.get()->min_upload_part_size;
             auto max_upload_part_size = settings.get()->max_upload_part_size;
-
-            if (!max_part_number)
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0");
-            else if (!min_upload_part_size)
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "min_upload_part_size must not be 0");
-            else if (max_upload_part_size < min_upload_part_size)
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size");
-
-            size_t part_size = min_upload_part_size;
-            size_t num_parts = (total_size + part_size - 1) / part_size;
-
-            if (num_parts > max_part_number)
-            {
-                part_size = (total_size + max_part_number - 1) / max_part_number;
-                num_parts = (total_size + part_size - 1) / part_size;
-            }
-
-            if (part_size > max_upload_part_size)
-            {
-                part_size = max_upload_part_size;
-                num_parts = (total_size + part_size - 1) / part_size;
-            }
-
-            if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size)
-            {
-                String msg;
-                if (num_parts < 1)
-                    msg = "Number of parts is zero";
-                else if (num_parts > max_part_number)
-                    msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number);
-                else if (part_size < min_upload_part_size)
-                    msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size);
-                else
-                    msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size);
-
-                throw Exception(
-                    ErrorCodes::INVALID_CONFIG_PARAMETER,
-                    "{} while writing {} bytes to AzureBlobStorage. Check max_part_number = {}, "
-                    "min_upload_part_size = {}, max_upload_part_size = {}",
-                    msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size);
-            }
-
+            if (!max_upload_part_size)
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0");
             /// We've calculated the size of a normal part (the final part can be smaller).
-            normal_part_size = part_size;
+            normal_part_size = max_upload_part_size;
         }
 
     public:
@@ -238,18 +186,13 @@ namespace
 
                 try
                 {
-                    auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
-                    task->data = new char[part_size];
-                    task->size = part_size;
-                    size_t n = read_buffer->read(task->data,part_size);
-                    if (n != part_size)
-                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size");
+                    task->read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
 
                     schedule([this, task, task_finish_notify]()
                     {
                         try
                         {
-                            processUploadTask(*task);
+                            processUploadPartRequest(*task);
                         }
                         catch (...)
                         {
@@ -267,38 +210,35 @@ namespace
             else
             {
                 UploadPartTask task;
-                auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
-                task.data = new char[part_size];
-                size_t n = read_buffer->read(task.data,part_size);
-                if (n != part_size)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size");
-                task.size = part_size;
-                processUploadTask(task);
-                block_ids.emplace_back(task.block_id);
+                task.read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
+                processUploadPartRequest(task);
+                block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
             }
         }
 
-        void processUploadTask(UploadPartTask & task)
-        {
-            auto block_id = processUploadPartRequest(task);
-
-            std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
-            task.block_id = block_id;
-            LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, block_id: {}, Parts: {}", dest_container_for_logging, dest_blob, block_id, bg_tasks.size());
-        }
-
-        String processUploadPartRequest(UploadPartTask & task)
+        void processUploadPartRequest(UploadPartTask & task)
         {
             ProfileEvents::increment(ProfileEvents::AzureUploadPart);
             if (for_disk_azure_blob_storage)
                 ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
 
             auto block_blob_client = client.get()->GetBlockBlobClient(dest_blob);
-            task.block_id = getRandomASCIIString(64);
-            Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.data), task.size);
-            block_blob_client.StageBlock(task.block_id, memory);
 
-            return task.block_id;
+            while (!task.read_buffer->eof())
+            {
+                  auto size = task.read_buffer->available();
+                  if (size > 0)
+                  {
+                      auto block_id = getRandomASCIIString(64);
+                      Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.read_buffer->position()), size);
+                      block_blob_client.StageBlock(block_id, memory);
+                      task.block_ids.emplace_back(block_id);
+                      task.read_buffer->ignore(size);
+                      LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id);
+                  }
+            }
+            std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
+            LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size());
         }
 
 
@@ -316,7 +256,7 @@ namespace
             {
                 if (task.exception)
                     std::rethrow_exception(task.exception);
-                block_ids.emplace_back(task.block_id);
+                block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
             }
         }
     };

From 288d288b8766f75670c82f9f4f190591f2ba7332 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 25 Jan 2024 19:57:51 +0200
Subject: [PATCH 142/884] fix failing 00937_template_output_format

---
 src/Core/Settings.h                                       | 2 +-
 src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp | 8 ++++----
 .../0_stateless/00937_format_schema_rows_template.sh      | 3 +--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d0a327e2d44..f9e3f401d98 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1080,7 +1080,7 @@ class IColumn;
     M(String, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
     M(String, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
     M(String, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
-    M(String, format_schema_rows_template, "\n", "Format string for rows (for Template format)", 0) \
+    M(String, format_schema_rows_template, "", "Format string for rows (for Template format)", 0) \
     M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
     \
     M(EscapingRule, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
index 99a7f59c09e..efda754917b 100644
--- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
@@ -226,14 +226,14 @@ void registerOutputFormatTemplate(FormatFactory & factory)
         }
         else
         {
-            if (!settings.template_settings.row_format_schema.empty())
-            {
-                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template, but not both");
-            }
             row_format = ParsedTemplateFormatString(
                 FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
                         settings.schema.is_server, settings.schema.format_schema_path),
                 idx_by_name);
+            if (!settings.template_settings.row_format_schema.empty())
+            {
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template, but not both");
+            }
         }
         return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter);
     });
diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index 8b512513d94..6161f71e78e 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE template (question String, answer Strin
 $CLICKHOUSE_CLIENT --query="INSERT INTO template VALUES
 ('How awesome is clickhouse?', 'unbelievably awesome!', 456, '2016-01-02'),\
 ('How fast is clickhouse?', 'Lightning fast!', 9876543210, '2016-01-03'),\
-('Is it opensource', 'of course it is!', 789, '2016-01-04')";
+('Is it opensource?', 'of course it is!', 789, '2016-01-04')";
 
 $CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
@@ -21,7 +21,6 @@ format_template_rows_between_delimiter = ';\n'";
 echo -e "\n"
 
 # Test that if both format_schema_rows_template setting and format_template_row are provided, error is thrown 
-
 echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > "$CURDIR"/00937_template_output_format_row.tmp
 $CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \

From a74c78c4f0cb0ff30014afe2edd78efca3450f49 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 25 Jan 2024 20:56:23 +0200
Subject: [PATCH 143/884] fix failing test 00937_format_schema_rows_template.sh

---
 .../queries/0_stateless/00937_format_schema_rows_template.sh  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index 6161f71e78e..aff5de3b555 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -22,10 +22,10 @@ echo -e "\n"
 
 # Test that if both format_schema_rows_template setting and format_template_row are provided, error is thrown 
 echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > "$CURDIR"/00937_template_output_format_row.tmp
-$CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
+$CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
 format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
-format_template_rows_between_delimiter = ';\n'; --{ serverError 474 }"
+format_template_rows_between_delimiter = ';\n'; --{clientError 474}"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE template";
 rm "$CURDIR"/00937_template_output_format_row.tmp

From 64d18ad8e706e859e1ddd9398ec0a96ae088e07c Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 25 Jan 2024 22:04:21 +0200
Subject: [PATCH 144/884] CI trigger

---
 .../0_stateless/00937_format_schema_rows_template.reference     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.reference b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
index 167f16ec55f..5f59cca2629 100644
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.reference
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
@@ -1,4 +1,4 @@
 Question: 'How awesome is clickhouse?', Answer: 'unbelievably awesome!', Number of Likes: 456, Date: 2016-01-02;
 Question: 'How fast is clickhouse?', Answer: 'Lightning fast!', Number of Likes: 9876543210, Date: 2016-01-03;
-Question: 'Is it opensource', Answer: 'of course it is!', Number of Likes: 789, Date: 2016-01-04
+Question: 'Is it opensource?', Answer: 'of course it is!', Number of Likes: 789, Date: 2016-01-04
 

From 36055bd0089f52473f893d71c475a2782a45e8b4 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Thu, 25 Jan 2024 21:44:46 +0000
Subject: [PATCH 145/884] init

---
 src/Functions/FunctionBinaryArithmetic.h      | 106 ++++++++++++++----
 src/Functions/IsOperation.h                   |   4 +-
 .../02975_intdiv_with_decimal.reference       |  52 +++++++++
 .../0_stateless/02975_intdiv_with_decimal.sql |  54 +++++++++
 4 files changed, 196 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/02975_intdiv_with_decimal.reference
 create mode 100644 tests/queries/0_stateless/02975_intdiv_with_decimal.sql

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 1b2519d1ec5..e34514d15fd 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -146,10 +146,24 @@ private: /// it's not correct for Decimal
 
 public:
     static constexpr bool allow_decimal = IsOperation<Operation>::allow_decimal;
+    static constexpr bool only_integer = IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero;
 
     /// Appropriate result type for binary operator on numeric types. "Date" can also mean
     /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
     using ResultDataType = Switch<
+        /// Result must be Integer
+        Case<
+            only_integer && IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>,
+            Switch<
+                Case<std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>, DataTypeInt256>,
+                Case<std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>, DataTypeInt128>,
+                Case<std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>, DataTypeInt64>,
+                Case<std::is_same_v<LeftDataType, DataTypeDecimal32> || std::is_same_v<RightDataType, DataTypeDecimal32>, DataTypeInt32>>>,
+        Case<
+            only_integer,
+            Switch<
+                Case<IsIntegral<LeftDataType>, LeftDataType>,
+                Case<IsIntegral<RightDataType>, RightDataType>>>,
         /// Decimal cases
         Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
         Case<
@@ -1667,31 +1681,77 @@ public:
                 {
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr (is_division)
+                        if constexpr (is_div_int || is_div_int_or_zero)
                         {
-                            if (context->getSettingsRef().decimal_check_overflow)
-                            {
-                                /// Check overflow by using operands scale (based on big decimal division implementation details):
-                                /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
-                                /// i.e. int_operand = decimal_operand*10^scale
-                                /// For division, left operand will be scaled by right operand scale also to do big integer division,
-                                /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
-                                /// So, we can check upfront possible overflow just by checking max scale used for left operand
-                                /// Note: it doesn't detect all possible overflow during big decimal division
-                                if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
-                                    throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
-                            }
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>)
+                                type_res = std::make_shared<DataTypeInt256>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>)
+                                type_res = std::make_shared<DataTypeInt128>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>)
+                                type_res = std::make_shared<DataTypeInt64>();
+                            else
+                                type_res = std::make_shared<DataTypeInt32>();
+                        }
+                        else
+                        {
+                            if constexpr (is_division)
+                            {
+                                if (context->getSettingsRef().decimal_check_overflow)
+                                {
+                                    /// Check overflow by using operands scale (based on big decimal division implementation details):
+                                    /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
+                                    /// i.e. int_operand = decimal_operand*10^scale
+                                    /// For division, left operand will be scaled by right operand scale also to do big integer division,
+                                    /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
+                                    /// So, we can check upfront possible overflow just by checking max scale used for left operand
+                                    /// Note: it doesn't detect all possible overflow during big decimal division
+                                    if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
+                                        throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
+                                }
+                            }
+                            ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
+                            type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                         }
-                        ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
-                        type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                     }
                     else if constexpr ((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
                         (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>))
                         type_res = std::make_shared<DataTypeFloat64>();
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
-                        type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
+                    {
+                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral<RightDataType>)
+                            type_res = std::make_shared<RightDataType>();
+                        else if constexpr (is_div_int || is_div_int_or_zero)
+                        {
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
+                                type_res = std::make_shared<DataTypeInt256>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
+                                type_res = std::make_shared<DataTypeInt128>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
+                                type_res = std::make_shared<DataTypeInt64>();
+                            else
+                                type_res = std::make_shared<DataTypeInt32>();
+                        }
+                        else
+                            type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
+                    }
                     else if constexpr (IsDataTypeDecimal<RightDataType>)
-                        type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
+                    {
+                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral<LeftDataType>)
+                            type_res = std::make_shared<LeftDataType>();
+                        else if constexpr (is_div_int || is_div_int_or_zero)
+                        {
+                            if constexpr (std::is_same_v<RightDataType, DataTypeDecimal256>)
+                                type_res = std::make_shared<DataTypeInt256>();
+                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal128>)
+                                type_res = std::make_shared<DataTypeInt128>();
+                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal64>)
+                                type_res = std::make_shared<DataTypeInt64>();
+                            else
+                                type_res = std::make_shared<DataTypeInt32>();
+                        }
+                        else
+                            type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
+                    }
                     else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime>)
                     {
                         // Special case for DateTime: binary OPS should reuse timezone
@@ -2009,8 +2069,10 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
             constexpr bool decimal_with_float = (IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>)
                 || (IsFloatingPoint<LeftDataType> && IsDataTypeDecimal<RightDataType>);
 
-            using T0 = std::conditional_t<decimal_with_float, Float64, typename LeftDataType::FieldType>;
-            using T1 = std::conditional_t<decimal_with_float, Float64, typename RightDataType::FieldType>;
+            constexpr bool is_div_int_with_decimal = (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>);
+
+            using T0 = std::conditional_t<decimal_with_float, Float64, std::conditional_t<is_div_int_with_decimal, Int64, typename LeftDataType::FieldType>>;
+            using T1 = std::conditional_t<decimal_with_float, Float64, std::conditional_t<is_div_int_with_decimal, Int64, typename RightDataType::FieldType>>;
             using ResultType = typename ResultDataType::FieldType;
             using ColVecT0 = ColumnVectorOrDecimal<T0>;
             using ColVecT1 = ColumnVectorOrDecimal<T1>;
@@ -2026,6 +2088,12 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                 left_col = castColumn(arguments[0], converted_type);
                 right_col = castColumn(arguments[1], converted_type);
             }
+            else if constexpr (is_div_int_with_decimal)
+            {
+                const auto converted_type = std::make_shared<DataTypeInt64>();
+                left_col = castColumn(arguments[0], converted_type);
+                right_col = castColumn(arguments[1], converted_type);
+            }
             else
             {
                 left_col = arguments[0].column;
diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h
index 8ea53c865ce..b36530591ef 100644
--- a/src/Functions/IsOperation.h
+++ b/src/Functions/IsOperation.h
@@ -62,7 +62,9 @@ struct IsOperation
 
     static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo;
 
-    static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest;
+    static constexpr bool division_allow_decimal = div_floating || modulo;
+
+    static constexpr bool allow_decimal = plus || minus || multiply || division_allow_decimal || least || greatest;
 };
 
 }
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
new file mode 100644
index 00000000000..9c1faab21d7
--- /dev/null
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
@@ -0,0 +1,52 @@
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
new file mode 100644
index 00000000000..8fc4b5a9a7d
--- /dev/null
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
@@ -0,0 +1,54 @@
+--intDiv--
+SELECT intDiv(4,2);
+SELECT intDiv(toDecimal32(4.4, 2), 2);
+SELECT intDiv(4, toDecimal32(2.2, 2));
+SELECT intDiv(toDecimal32(4.4, 2), 2);
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal32(2.2, 2));
+SELECT intDiv(toDecimal64(4.4, 3), 2);
+SELECT intDiv(toDecimal64(4.4, 3), toDecimal32(2.2, 2));
+SELECT intDiv(toDecimal128(4.4, 4), 2);
+SELECT intDiv(toDecimal128(4.4, 4), toDecimal32(2.2, 2));
+SELECT intDiv(toDecimal256(4.4, 5), 2);
+SELECT intDiv(toDecimal256(4.4, 5), toDecimal32(2.2, 2));
+SELECT intDiv(4, toDecimal64(2.2, 2));
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDiv(4, toDecimal128(2.2, 3));
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 3));
+SELECT intDiv(4, toDecimal256(2.2, 4));
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 4));
+SELECT intDiv(toDecimal64(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDiv(toDecimal128(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDiv(toDecimal256(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDiv(toDecimal64(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDiv(toDecimal128(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDiv(toDecimal256(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDiv(toDecimal64(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDiv(toDecimal128(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDiv(toDecimal256(4.4, 2), toDecimal256(2.2, 2));
+--intDivOrZero--
+SELECT intDivOrZero(4,2);
+SELECT intDivOrZero(toDecimal32(4.4, 2), 2);
+SELECT intDivOrZero(4, toDecimal32(2.2, 2));
+SELECT intDivOrZero(toDecimal32(4.4, 2), 2);
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal32(2.2, 2));
+SELECT intDivOrZero(toDecimal64(4.4, 3), 2);
+SELECT intDivOrZero(toDecimal64(4.4, 3), toDecimal32(2.2, 2));
+SELECT intDivOrZero(toDecimal128(4.4, 4), 2);
+SELECT intDivOrZero(toDecimal128(4.4, 4), toDecimal32(2.2, 2));
+SELECT intDivOrZero(toDecimal256(4.4, 5), 2);
+SELECT intDivOrZero(toDecimal256(4.4, 5), toDecimal32(2.2, 2));
+SELECT intDivOrZero(4, toDecimal64(2.2, 2));
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDivOrZero(4, toDecimal128(2.2, 3));
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 3));
+SELECT intDivOrZero(4, toDecimal256(2.2, 4));
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 4));
+SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal64(2.2, 2));
+SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal128(2.2, 2));
+SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal256(2.2, 2));

From 918614970b8bbf0938e2ee5769d9891ec6cfecbf Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 26 Jan 2024 02:02:03 +0100
Subject: [PATCH 146/884] Fix style

---
 tests/integration/test_storage_hdfs/test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 165dfb212b7..121263fb622 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -599,9 +599,7 @@ def test_schema_inference_with_globs(started_cluster):
         f"desc hdfs('hdfs://hdfs1:9000/data*.jsoncompacteachrow') settings schema_inference_use_cache_for_hdfs=0, input_format_json_infer_incomplete_types_as_strings=0"
     )
 
-    assert (
-        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
-    )
+    assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
 
 
 def test_insert_select_schema_inference(started_cluster):

From 13c86248719eabd22856e5af7161f2f7547fdd8e Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 26 Jan 2024 02:03:12 +0100
Subject: [PATCH 147/884] Fix style

---
 tests/integration/test_storage_s3/test.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 0b5e9462860..dbbe670e8ca 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1379,9 +1379,7 @@ def test_schema_inference_from_globs(started_cluster):
         f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0"
     )
 
-    assert (
-        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
-    )
+    assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
 
     url_filename = "test{0,1,2,3}.jsoncompacteachrow"
 
@@ -1389,9 +1387,7 @@ def test_schema_inference_from_globs(started_cluster):
         f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0"
     )
 
-    assert (
-        "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
-    )
+    assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result
 
 
 def test_signatures(started_cluster):

From 692d37306eda131578a3c9df77c9c2c7e46fb231 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 26 Jan 2024 15:50:16 +0100
Subject: [PATCH 148/884] Fix build

---
 src/Storages/StorageURLCluster.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 7b38048b384..d0df74d7521 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -44,7 +44,7 @@ StorageURLCluster::StorageURLCluster(
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
     const StorageURL::Configuration & configuration_)
-    : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
+    : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")"))
     , uri(uri_), format_name(format_)
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(uri));

From bade45d197884812886fd6eedd85883f5d67fa50 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sat, 27 Jan 2024 21:30:49 +0100
Subject: [PATCH 149/884] impl

---
 contrib/aws                      |  2 +-
 contrib/aws-c-auth               |  2 +-
 contrib/aws-c-cal                |  2 +-
 contrib/aws-c-common             |  2 +-
 contrib/aws-c-compression        |  2 +-
 contrib/aws-c-event-stream       |  2 +-
 contrib/aws-c-http               |  2 +-
 contrib/aws-c-io                 |  2 +-
 contrib/aws-c-mqtt               |  2 +-
 contrib/aws-c-s3                 |  2 +-
 contrib/aws-c-sdkutils           |  2 +-
 contrib/aws-checksums            |  2 +-
 contrib/aws-cmake/CMakeLists.txt | 11 ++++++++++-
 contrib/aws-crt-cpp              |  2 +-
 contrib/aws-s2n-tls              |  2 +-
 15 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/contrib/aws b/contrib/aws
index ca02358dcc7..4ec215f3607 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3
+Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4
diff --git a/contrib/aws-c-auth b/contrib/aws-c-auth
index 97133a2b5db..baeffa791d9 160000
--- a/contrib/aws-c-auth
+++ b/contrib/aws-c-auth
@@ -1 +1 @@
-Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12
+Subproject commit baeffa791d9d1cf61460662a6d9ac2186aaf05df
diff --git a/contrib/aws-c-cal b/contrib/aws-c-cal
index 85dd7664b78..9453687ff54 160000
--- a/contrib/aws-c-cal
+++ b/contrib/aws-c-cal
@@ -1 +1 @@
-Subproject commit 85dd7664b786a389c6fb1a6f031ab4bb2282133d
+Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77
diff --git a/contrib/aws-c-common b/contrib/aws-c-common
index 45dcb2849c8..80f21b3cac5 160000
--- a/contrib/aws-c-common
+++ b/contrib/aws-c-common
@@ -1 +1 @@
-Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff
+Subproject commit 80f21b3cac5ac51c6b8a62c7d2a5ef58a75195ee
diff --git a/contrib/aws-c-compression b/contrib/aws-c-compression
index b517b7decd0..99ec79ee297 160000
--- a/contrib/aws-c-compression
+++ b/contrib/aws-c-compression
@@ -1 +1 @@
-Subproject commit b517b7decd0dac30be2162f5186c250221c53aff
+Subproject commit 99ec79ee2970f1a045d4ced1501b97ee521f2f85
diff --git a/contrib/aws-c-event-stream b/contrib/aws-c-event-stream
index 2f9b60c42f9..08f24e384e5 160000
--- a/contrib/aws-c-event-stream
+++ b/contrib/aws-c-event-stream
@@ -1 +1 @@
-Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d
+Subproject commit 08f24e384e5be20bcffa42b49213d24dad7881ae
diff --git a/contrib/aws-c-http b/contrib/aws-c-http
index dd344619879..a082f8a2067 160000
--- a/contrib/aws-c-http
+++ b/contrib/aws-c-http
@@ -1 +1 @@
-Subproject commit dd34461987947672444d0bc872c5a733dfdb9711
+Subproject commit a082f8a2067e4a31db73f1d4ffd702a8dc0f7089
diff --git a/contrib/aws-c-io b/contrib/aws-c-io
index d58ed4f272b..11ce3c750a1 160000
--- a/contrib/aws-c-io
+++ b/contrib/aws-c-io
@@ -1 +1 @@
-Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89
+Subproject commit 11ce3c750a1dac7b04069fc5bff89e97e91bad4d
diff --git a/contrib/aws-c-mqtt b/contrib/aws-c-mqtt
index 33c3455cec8..6d36cd37262 160000
--- a/contrib/aws-c-mqtt
+++ b/contrib/aws-c-mqtt
@@ -1 +1 @@
-Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194
+Subproject commit 6d36cd3726233cb757468d0ea26f6cd8dad151ec
diff --git a/contrib/aws-c-s3 b/contrib/aws-c-s3
index d7bfe602d69..de36fee8fe7 160000
--- a/contrib/aws-c-s3
+++ b/contrib/aws-c-s3
@@ -1 +1 @@
-Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900
+Subproject commit de36fee8fe7ab02f10987877ae94a805bf440c1f
diff --git a/contrib/aws-c-sdkutils b/contrib/aws-c-sdkutils
index 208a701fa01..fd8c0ba2e23 160000
--- a/contrib/aws-c-sdkutils
+++ b/contrib/aws-c-sdkutils
@@ -1 +1 @@
-Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972
+Subproject commit fd8c0ba2e233997eaaefe82fb818b8b444b956d3
diff --git a/contrib/aws-checksums b/contrib/aws-checksums
index ad53be196a2..321b805559c 160000
--- a/contrib/aws-checksums
+++ b/contrib/aws-checksums
@@ -1 +1 @@
-Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0
+Subproject commit 321b805559c8e911be5bddba13fcbd222a3e2d3a
diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt
index 950a0e06cd0..b913908911c 100644
--- a/contrib/aws-cmake/CMakeLists.txt
+++ b/contrib/aws-cmake/CMakeLists.txt
@@ -35,6 +35,8 @@ set(AWS_PUBLIC_COMPILE_DEFS)
 set(AWS_PRIVATE_COMPILE_DEFS)
 set(AWS_PRIVATE_LIBS)
 
+list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DINTEL_NO_ITTNOTIFY_API")
+
 if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
     list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DDEBUG_BUILD")
 endif()
@@ -85,14 +87,20 @@ file(GLOB AWS_SDK_CORE_SRC
     "${AWS_SDK_CORE_DIR}/source/external/cjson/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/external/tinyxml2/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/http/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/http/crt/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/http/standard/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/internal/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/monitoring/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/net/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/net/linux-shared/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/platform/linux-shared/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/smithy/tracing/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/base64/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/utils/component-registry/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/crypto/*.cpp"
-    "${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/crypto/factory/*.cpp"
+    "${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/event/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/json/*.cpp"
     "${AWS_SDK_CORE_DIR}/source/utils/logging/*.cpp"
@@ -176,6 +184,7 @@ file(GLOB AWS_COMMON_SRC
     "${AWS_COMMON_DIR}/source/*.c"
     "${AWS_COMMON_DIR}/source/external/*.c"
     "${AWS_COMMON_DIR}/source/posix/*.c"
+    "${AWS_COMMON_DIR}/source/linux/*.c"
 )
 
 file(GLOB AWS_COMMON_ARCH_SRC
diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp
index 8a301b7e842..86adce22528 160000
--- a/contrib/aws-crt-cpp
+++ b/contrib/aws-crt-cpp
@@ -1 +1 @@
-Subproject commit 8a301b7e842f1daed478090c869207300972379f
+Subproject commit 86adce22528b811efa5ca27f65d8d5a38223cbfa
diff --git a/contrib/aws-s2n-tls b/contrib/aws-s2n-tls
index 71f4794b758..9a1e7545402 160000
--- a/contrib/aws-s2n-tls
+++ b/contrib/aws-s2n-tls
@@ -1 +1 @@
-Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4
+Subproject commit 9a1e75454023e952b366ce1eab9c54007250119f

From 37823a7b91a7bafc05289ec9e4ed1f4448146219 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sat, 27 Jan 2024 23:45:14 +0100
Subject: [PATCH 150/884] use upstream repo

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 68016bf8c5b..a618104f364 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -99,7 +99,7 @@
 	url = https://github.com/awslabs/aws-c-event-stream
 [submodule "aws-c-common"]
 	path = contrib/aws-c-common
-	url = https://github.com/ClickHouse/aws-c-common
+	url = https://github.com/awslabs/aws-c-common.git
 [submodule "aws-checksums"]
 	path = contrib/aws-checksums
 	url = https://github.com/awslabs/aws-checksums

From d264a5a148c577ab046dc4bbef50b5a4e0c32db9 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 28 Jan 2024 12:06:52 +0100
Subject: [PATCH 151/884] Update client & settings to std::shared_ptr<const>

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  6 ++--
 src/Backups/BackupIO_AzureBlobStorage.h       |  8 +++---
 .../AzureBlobStorage/AzureObjectStorage.h     |  6 ++--
 .../Cached/CachedObjectStorage.h              |  2 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |  2 +-
 .../copyAzureBlobStorageFile.cpp              | 28 +++++++++----------
 .../copyAzureBlobStorageFile.h                | 10 +++----
 7 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 1b4c10ad0cb..d99f296cca1 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -89,7 +89,7 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const
         key = file_name;
     }
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client.get(), key, read_settings, settings.get()->max_single_read_retries,
+        client, key, read_settings, settings.get()->max_single_read_retries,
         settings.get()->max_single_download_retries);
 }
 
@@ -262,7 +262,7 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
     }
 
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client.get(), key, read_settings, settings.get()->max_single_read_retries,
+        client, key, read_settings, settings.get()->max_single_read_retries,
         settings.get()->max_single_download_retries);
 }
 
@@ -278,7 +278,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
         key = file_name;
     }
     return std::make_unique<WriteBufferFromAzureBlobStorage>(
-        client.get(),
+        client,
         key,
         settings.get()->max_single_part_upload_size,
         DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 87dc470cdb3..95325044a62 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -28,10 +28,10 @@ public:
 
 private:
     const DataSourceDescription data_source_description;
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
-    MultiVersion<AzureObjectStorageSettings> settings;
+    std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
 
 class BackupWriterAzureBlobStorage : public BackupWriterDefault
@@ -57,10 +57,10 @@ private:
     std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
     void removeFilesBatch(const Strings & file_names);
     const DataSourceDescription data_source_description;
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
     StorageAzureBlob::Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
-    MultiVersion<AzureObjectStorageSettings> settings;
+    std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
 
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 30fedb601dc..0ae12fb205f 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -137,11 +137,11 @@ public:
 
     bool isRemote() const override { return true; }
 
-    MultiVersion<AzureObjectStorageSettings> & getSettings() { return settings; }
+    std::shared_ptr<const AzureObjectStorageSettings> getSettings() { return settings.get(); }
 
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient() override
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
     {
-        return client;
+        return client.get();
     }
 
 private:
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 2ed8990515f..1f293e5857e 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -122,7 +122,7 @@ public:
     static bool canUseReadThroughCache(const ReadSettings & settings);
 
 #if USE_AZURE_BLOB_STORAGE
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient() override
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
     {
         return object_storage->getAzureBlobStorageClient();
     }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index e066beaefcc..049935ad60c 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -226,7 +226,7 @@ public:
     virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
 
 #if USE_AZURE_BLOB_STORAGE
-    virtual MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & getAzureBlobStorageClient()
+    virtual std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient()
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage");
     }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index e5517a1a021..537a5a191e7 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -44,12 +44,12 @@ namespace
     public:
         UploadHelper(
             const CreateReadBuffer & create_read_buffer_,
-            MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client_,
+            std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client_,
             size_t offset_,
             size_t total_size_,
             const String & dest_container_for_logging_,
             const String & dest_blob_,
-            MultiVersion<AzureObjectStorageSettings> settings_,
+            std::shared_ptr<const AzureObjectStorageSettings> settings_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_azure_blob_storage_,
             const Poco::Logger * log_)
@@ -71,12 +71,12 @@ namespace
 
     protected:
         std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
-        MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client;
+        std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
         size_t offset;
         size_t total_size;
         const String & dest_container_for_logging;
         const String & dest_blob;
-        MultiVersion<AzureObjectStorageSettings> settings;
+        std::shared_ptr<const AzureObjectStorageSettings> settings;
         ThreadPoolCallbackRunner<void> schedule;
         bool for_disk_azure_blob_storage;
         const Poco::Logger * log;
@@ -116,7 +116,7 @@ namespace
 
         void completeMultipartUpload()
         {
-            auto block_blob_client = client.get()->GetBlockBlobClient(dest_blob);
+            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
             block_blob_client.CommitBlockList(block_ids);
         }
 
@@ -222,7 +222,7 @@ namespace
             if (for_disk_azure_blob_storage)
                 ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
 
-            auto block_blob_client = client.get()->GetBlockBlobClient(dest_blob);
+            auto block_blob_client = client->GetBlockBlobClient(dest_blob);
 
             while (!task.read_buffer->eof())
             {
@@ -267,10 +267,10 @@ void copyDataToAzureBlobStorageFile(
     const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
     size_t offset,
     size_t size,
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
     const String & dest_container_for_logging,
     const String & dest_blob,
-    MultiVersion<AzureObjectStorageSettings> settings,
+    std::shared_ptr<const AzureObjectStorageSettings> settings,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
 {
@@ -280,15 +280,15 @@ void copyDataToAzureBlobStorageFile(
 
 
 void copyAzureBlobStorageFile(
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
     const String & src_container_for_logging,
     const String & src_blob,
     size_t offset,
     size_t size,
     const String & dest_container_for_logging,
     const String & dest_blob,
-    MultiVersion<AzureObjectStorageSettings> settings,
+    std::shared_ptr<const AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_azure_blob_storage)
@@ -300,8 +300,8 @@ void copyAzureBlobStorageFile(
         if (for_disk_azure_blob_storage)
             ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
 
-        auto block_blob_client_src = src_client.get()->GetBlockBlobClient(src_blob);
-        auto block_blob_client_dest = dest_client.get()->GetBlockBlobClient(dest_blob);
+        auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
+        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
         auto source_uri = block_blob_client_src.GetUrl();
 
         if (size < max_single_operation_copy_size)
@@ -328,7 +328,7 @@ void copyAzureBlobStorageFile(
         LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client.get(), src_blob, read_settings, settings.get()->max_single_read_retries,
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings.get()->max_single_read_retries,
             settings.get()->max_single_download_retries);
         };
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 15a31031f63..83814f42693 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -21,15 +21,15 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// Copies a file from AzureBlobStorage to AzureBlobStorage.
 /// The parameters `src_offset` and `src_size` specify a part in the source to copy.
 void copyAzureBlobStorageFile(
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & src_client,
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & dest_client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
     const String & src_container_for_logging,
     const String & src_blob,
     size_t src_offset,
     size_t src_size,
     const String & dest_container_for_logging,
     const String & dest_blob,
-    MultiVersion<AzureObjectStorageSettings> settings,
+    std::shared_ptr<const AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
@@ -44,10 +44,10 @@ void copyDataToAzureBlobStorageFile(
     const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
     size_t offset,
     size_t size,
-    MultiVersion<Azure::Storage::Blobs::BlobContainerClient> & client,
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client,
     const String & dest_container_for_logging,
     const String & dest_blob,
-    MultiVersion<AzureObjectStorageSettings> settings,
+    std::shared_ptr<const AzureObjectStorageSettings> settings,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
 

From b54be00783c38786370bce20930e626adc8fb3a1 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sun, 28 Jan 2024 14:36:22 +0100
Subject: [PATCH 152/884] fix build

---
 contrib/aws-cmake/CMakeLists.txt | 6 +++---
 contrib/aws-crt-cpp              | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt
index b913908911c..abde20addaf 100644
--- a/contrib/aws-cmake/CMakeLists.txt
+++ b/contrib/aws-cmake/CMakeLists.txt
@@ -25,6 +25,7 @@ include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsFeatureTests.cmake")
 include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadAffinity.cmake")
 include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadName.cmake")
 include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsSIMD.cmake")
+include("${ClickHouse_SOURCE_DIR}/contrib/aws-crt-cpp/cmake/AwsGetVersion.cmake")
 
 
 # Gather sources and options.
@@ -123,9 +124,8 @@ OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
 configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
     "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY)
 
-list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
-list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
-list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
+aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH)
+configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY)
 
 list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
 
diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp
index 86adce22528..f532d6abc0d 160000
--- a/contrib/aws-crt-cpp
+++ b/contrib/aws-crt-cpp
@@ -1 +1 @@
-Subproject commit 86adce22528b811efa5ca27f65d8d5a38223cbfa
+Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0

From 368a26a2aac14da37914b5a25f9537008e48d349 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sun, 28 Jan 2024 15:48:49 +0100
Subject: [PATCH 153/884] fix build

---
 contrib/update-submodules.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh
index 7195de020bd..072d7a5dc2f 100755
--- a/contrib/update-submodules.sh
+++ b/contrib/update-submodules.sh
@@ -24,7 +24,7 @@ git config --file .gitmodules --get-regexp '.*path' | sed 's/[^ ]* //' | xargs -
 # We don't want to depend on any third-party CMake files.
 # To check it, find and delete them.
 grep -o -P '"contrib/[^"]+"' .gitmodules |
-  grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion)' |
+  grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion|aws-crt-cpp)' |
   xargs -I@ find @ \
     -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' \
     -delete

From ba85642453915dd57c0cba256b35bf8bec390ea5 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Sun, 28 Jan 2024 20:26:55 +0000
Subject: [PATCH 154/884] split ISlotControl from ConcurrencyControl

---
 programs/server/Server.cpp                    |  2 +-
 src/Common/ConcurrencyControl.cpp             | 28 ++++---
 src/Common/ConcurrencyControl.h               | 36 ++++-----
 src/Common/ISlotControl.h                     | 76 +++++++++++++++++++
 .../tests/gtest_concurrency_control.cpp       | 28 +++----
 src/Processors/Executors/PipelineExecutor.cpp | 14 ++--
 src/Processors/Executors/PipelineExecutor.h   |  4 +-
 7 files changed, 132 insertions(+), 56 deletions(-)
 create mode 100644 src/Common/ISlotControl.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 75ec574c357..d6bee995ca4 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1366,7 +1366,7 @@ try
             global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
             global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
 
-            ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
+            SlotCount concurrent_threads_soft_limit = UnlimitedSlots;
             if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
                 concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
             if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
diff --git a/src/Common/ConcurrencyControl.cpp b/src/Common/ConcurrencyControl.cpp
index c9fe51550dc..0893cfce955 100644
--- a/src/Common/ConcurrencyControl.cpp
+++ b/src/Common/ConcurrencyControl.cpp
@@ -12,10 +12,10 @@ namespace ErrorCodes
 
 ConcurrencyControl::Slot::~Slot()
 {
-    allocation->release();
+    static_cast<ConcurrencyControl::Allocation&>(*allocation).release();
 }
 
-ConcurrencyControl::Slot::Slot(AllocationPtr && allocation_)
+ConcurrencyControl::Slot::Slot(SlotAllocationPtr && allocation_)
     : allocation(std::move(allocation_))
 {
 }
@@ -27,7 +27,7 @@ ConcurrencyControl::Allocation::~Allocation()
     parent.free(this);
 }
 
-[[nodiscard]] ConcurrencyControl::SlotPtr ConcurrencyControl::Allocation::tryAcquire()
+[[nodiscard]] AcquiredSlotPtr ConcurrencyControl::Allocation::tryAcquire()
 {
     SlotCount value = granted.load();
     while (value)
@@ -35,15 +35,21 @@ ConcurrencyControl::Allocation::~Allocation()
         if (granted.compare_exchange_strong(value, value - 1))
         {
             std::unique_lock lock{mutex};
-            return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
+            return AcquiredSlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
         }
     }
     return {}; // avoid unnecessary locking
 }
 
-ConcurrencyControl::SlotCount ConcurrencyControl::Allocation::grantedCount() const
+SlotCount ConcurrencyControl::Allocation::grantedCount() const
 {
-    return granted;
+    return granted.load();
+}
+
+SlotCount ConcurrencyControl::Allocation::allocatedCount() const
+{
+    std::unique_lock lock{mutex};
+    return allocated;
 }
 
 ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_)
@@ -87,7 +93,7 @@ ConcurrencyControl::~ConcurrencyControl()
         abort();
 }
 
-[[nodiscard]] ConcurrencyControl::AllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max)
+[[nodiscard]] SlotAllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max)
 {
     if (min > max)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements");
@@ -100,13 +106,13 @@ ConcurrencyControl::~ConcurrencyControl()
 
     // Create allocation and start waiting if more slots are required
     if (granted < max)
-        return AllocationPtr(new Allocation(*this, max, granted,
+        return SlotAllocationPtr(new Allocation(*this, max, granted,
             waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
     else
-        return AllocationPtr(new Allocation(*this, max, granted));
+        return SlotAllocationPtr(new Allocation(*this, max, granted));
 }
 
-void ConcurrencyControl::setMaxConcurrency(ConcurrencyControl::SlotCount value)
+void ConcurrencyControl::setMaxConcurrency(SlotCount value)
 {
     std::unique_lock lock{mutex};
     max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
@@ -162,7 +168,7 @@ void ConcurrencyControl::schedule(std::unique_lock<std::mutex> &)
     }
 }
 
-ConcurrencyControl::SlotCount ConcurrencyControl::available(std::unique_lock<std::mutex> &) const
+SlotCount ConcurrencyControl::available(std::unique_lock<std::mutex> &) const
 {
     if (cur_concurrency < max_concurrency)
         return max_concurrency - cur_concurrency;
diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h
index 7e20384aa2a..ba94502962c 100644
--- a/src/Common/ConcurrencyControl.h
+++ b/src/Common/ConcurrencyControl.h
@@ -7,6 +7,7 @@
 #include <base/types.h>
 #include <boost/core/noncopyable.hpp>
 
+#include <Common/ISlotControl.h>
 
 namespace DB
 {
@@ -34,41 +35,35 @@ namespace DB
  * Oversubscription is possible: total amount of allocated slots can exceed `setMaxConcurrency(limit)`
  * because `min` amount of slots is allocated for each query unconditionally.
  */
-class ConcurrencyControl : boost::noncopyable
+class ConcurrencyControl : public ISlotControl
 {
 public:
     struct Allocation;
-    using AllocationPtr = std::shared_ptr<Allocation>;
-    using SlotCount = UInt64;
     using Waiters = std::list<Allocation *>;
 
-    static constexpr SlotCount Unlimited = std::numeric_limits<SlotCount>::max();
-
     // Scoped guard for acquired slot, see Allocation::tryAcquire()
-    struct Slot : boost::noncopyable
+    struct Slot : public IAcquiredSlot
     {
-        ~Slot();
+        ~Slot() override;
 
     private:
         friend struct Allocation; // for ctor
 
-        explicit Slot(AllocationPtr && allocation_);
+        explicit Slot(SlotAllocationPtr && allocation_);
 
-        AllocationPtr allocation;
+        SlotAllocationPtr allocation;
     };
 
-    // FIXME: have to be unique_ptr, but ThreadFromGlobalPool does not support move semantics yet
-    using SlotPtr = std::shared_ptr<Slot>;
-
     // Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
-    struct Allocation : std::enable_shared_from_this<Allocation>, boost::noncopyable
+    struct Allocation : public ISlotAllocation
     {
-        ~Allocation();
+        ~Allocation() override;
 
         // Take one already granted slot if available. Lock-free iff there is no granted slot.
-        [[nodiscard]] SlotPtr tryAcquire();
+        [[nodiscard]] AcquiredSlotPtr tryAcquire() override;
 
-        SlotCount grantedCount() const;
+        SlotCount grantedCount() const override;
+        SlotCount allocatedCount() const override;
 
     private:
         friend struct Slot; // for release()
@@ -94,7 +89,7 @@ public:
         ConcurrencyControl & parent;
         const SlotCount limit;
 
-        std::mutex mutex; // the following values must be accessed under this mutex
+        mutable std::mutex mutex; // the following values must be accessed under this mutex
         SlotCount allocated; // allocated total (including already `released`)
         SlotCount released = 0;
 
@@ -103,17 +98,16 @@ public:
         const Waiters::iterator waiter; // iterator to itself in Waiters list; valid iff allocated < limit
     };
 
-public:
     ConcurrencyControl();
 
     // WARNING: all Allocation objects MUST be destructed before ConcurrencyControl
     // NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries
-    ~ConcurrencyControl();
+    ~ConcurrencyControl() override;
 
     // Allocate at least `min` and at most `max` slots.
     // If not all `max` slots were successfully allocated, a subscription for later allocation is created
     // Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread.
-    [[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max);
+    [[nodiscard]] SlotAllocationPtr allocate(SlotCount min, SlotCount max) override;
 
     void setMaxConcurrency(SlotCount value);
 
@@ -134,7 +128,7 @@ private:
     std::mutex mutex;
     Waiters waiters;
     Waiters::iterator cur_waiter; // round-robin pointer
-    SlotCount max_concurrency = Unlimited;
+    SlotCount max_concurrency = UnlimitedSlots;
     SlotCount cur_concurrency = 0;
 };
 
diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h
new file mode 100644
index 00000000000..add19f0cc0c
--- /dev/null
+++ b/src/Common/ISlotControl.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <limits>
+#include <memory>
+#include <base/types.h>
+#include <boost/core/noncopyable.hpp>
+
+
+namespace DB
+{
+
+// Interfaces for abstract "slot" allocation and control.
+// Slot is a virtual entity existing in a limited amount (CPUs or memory chunks, etc).
+//
+// Every slot can be in one of the following states:
+//  * free: slot is available to be allocated.
+//  * allocated: slot is allocated to a specific ISlotAllocation.
+//
+// Allocated slots can be considered as:
+//  * granted: allocated, but not yet acquired.
+//  * acquired: acquired using IAcquiredSlot.
+//
+// Example for CPU (see ConcurrencyControl.h). Every slot represents one CPU in the system.
+// Slot allocation is a request to allocate specific number of CPUs for a specific query.
+// Acquired slot is an entity that is held by a thread as long as it is running. This allows
+// total number of threads in the system to be limited and the distribution process to be controlled.
+//
+// TODO:
+// - for preemption - ability to return granted slot back and reacquire it later.
+// - for memory allocations - variable size of slots (in bytes).
+
+/// Number of slots
+using SlotCount = UInt64;
+
+/// Unlimited number of slots
+constexpr SlotCount UnlimitedSlots = std::numeric_limits<SlotCount>::max();
+
+/// Acquired slot holder. Slot is considered to be acquired as long the object exists.
+class IAcquiredSlot : public std::enable_shared_from_this<IAcquiredSlot>, boost::noncopyable
+{
+public:
+    virtual ~IAcquiredSlot() = default;
+};
+
+using AcquiredSlotPtr = std::shared_ptr<IAcquiredSlot>;
+
+/// Request for allocation of slots from ISlotControl.
+/// Allows for more slots to be acquired and the whole request to be canceled.
+class ISlotAllocation : public std::enable_shared_from_this<ISlotAllocation>, boost::noncopyable
+{
+public:
+    virtual ~ISlotAllocation() = default;
+
+    /// Take one already granted slot if available.
+    [[nodiscard]] virtual AcquiredSlotPtr tryAcquire() = 0;
+
+    /// Returns the number of granted slots for given allocation (i.e. available to be acquired)
+    virtual SlotCount grantedCount() const = 0;
+
+    /// Returns the total number of slots allocated at the moment (acquired and granted)
+    virtual SlotCount allocatedCount() const = 0;
+};
+
+using SlotAllocationPtr = std::shared_ptr<ISlotAllocation>;
+
+class ISlotControl : boost::noncopyable
+{
+public:
+    virtual ~ISlotControl() = default;
+
+    // Allocate at least `min` and at most `max` slots.
+    // If not all `max` slots were successfully allocated, a "subscription" for later allocation is created
+    [[nodiscard]] virtual SlotAllocationPtr allocate(SlotCount min, SlotCount max) = 0;
+};
+
+}
diff --git a/src/Common/tests/gtest_concurrency_control.cpp b/src/Common/tests/gtest_concurrency_control.cpp
index 8e5b89a72a0..5e579317ade 100644
--- a/src/Common/tests/gtest_concurrency_control.cpp
+++ b/src/Common/tests/gtest_concurrency_control.cpp
@@ -15,7 +15,7 @@ struct ConcurrencyControlTest
 {
     ConcurrencyControl cc;
 
-    explicit ConcurrencyControlTest(ConcurrencyControl::SlotCount limit = ConcurrencyControl::Unlimited)
+    explicit ConcurrencyControlTest(SlotCount limit = UnlimitedSlots)
     {
         cc.setMaxConcurrency(limit);
     }
@@ -25,7 +25,7 @@ TEST(ConcurrencyControl, Unlimited)
 {
     ConcurrencyControlTest t; // unlimited number of slots
     auto slots = t.cc.allocate(0, 100500);
-    std::vector<ConcurrencyControl::SlotPtr> acquired;
+    std::vector<AcquiredSlotPtr> acquired;
     while (auto slot = slots->tryAcquire())
         acquired.emplace_back(std::move(slot));
     ASSERT_TRUE(acquired.size() == 100500);
@@ -34,14 +34,14 @@ TEST(ConcurrencyControl, Unlimited)
 TEST(ConcurrencyControl, Fifo)
 {
     ConcurrencyControlTest t(1); // use single slot
-    std::vector<ConcurrencyControl::AllocationPtr> allocations;
+    std::vector<SlotAllocationPtr> allocations;
     constexpr int count = 42;
     allocations.reserve(count);
     for (int i = 0; i < count; i++)
         allocations.emplace_back(t.cc.allocate(0, 1));
     for (int i = 0; i < count; i++)
     {
-        ConcurrencyControl::SlotPtr holder;
+        AcquiredSlotPtr holder;
         for (int j = 0; j < count; j++)
         {
             auto slot = allocations[j]->tryAcquire();
@@ -60,11 +60,11 @@ TEST(ConcurrencyControl, Fifo)
 TEST(ConcurrencyControl, Oversubscription)
 {
     ConcurrencyControlTest t(10);
-    std::vector<ConcurrencyControl::AllocationPtr> allocations;
+    std::vector<SlotAllocationPtr> allocations;
     allocations.reserve(10);
     for (int i = 0; i < 10; i++)
         allocations.emplace_back(t.cc.allocate(1, 2));
-    std::vector<ConcurrencyControl::SlotPtr> slots;
+    std::vector<AcquiredSlotPtr> slots;
     // Normal allocation using maximum amount of slots
     for (int i = 0; i < 5; i++)
     {
@@ -90,7 +90,7 @@ TEST(ConcurrencyControl, ReleaseUnacquiredSlots)
 {
     ConcurrencyControlTest t(10);
     {
-        std::vector<ConcurrencyControl::AllocationPtr> allocations;
+        std::vector<SlotAllocationPtr> allocations;
         allocations.reserve(10);
         for (int i = 0; i < 10; i++)
             allocations.emplace_back(t.cc.allocate(1, 2));
@@ -98,7 +98,7 @@ TEST(ConcurrencyControl, ReleaseUnacquiredSlots)
     }
     // Check that slots were actually released
     auto allocation = t.cc.allocate(0, 20);
-    std::vector<ConcurrencyControl::SlotPtr> acquired;
+    std::vector<AcquiredSlotPtr> acquired;
     while (auto slot = allocation->tryAcquire())
         acquired.emplace_back(std::move(slot));
     ASSERT_TRUE(acquired.size() == 10);
@@ -110,7 +110,7 @@ TEST(ConcurrencyControl, DestroyNotFullyAllocatedAllocation)
     for (int i = 0; i < 3; i++)
     {
         auto allocation = t.cc.allocate(5, 20);
-        std::vector<ConcurrencyControl::SlotPtr> acquired;
+        std::vector<AcquiredSlotPtr> acquired;
         while (auto slot = allocation->tryAcquire())
             acquired.emplace_back(std::move(slot));
         ASSERT_TRUE(acquired.size() == 10);
@@ -122,7 +122,7 @@ TEST(ConcurrencyControl, DestroyAllocationBeforeSlots)
     ConcurrencyControlTest t(10);
     for (int i = 0; i < 3; i++)
     {
-        std::vector<ConcurrencyControl::SlotPtr> acquired;
+        std::vector<AcquiredSlotPtr> acquired;
         auto allocation = t.cc.allocate(5, 20);
         while (auto slot = allocation->tryAcquire())
             acquired.emplace_back(std::move(slot));
@@ -135,7 +135,7 @@ TEST(ConcurrencyControl, GrantReleasedToTheSameAllocation)
 {
     ConcurrencyControlTest t(3);
     auto allocation = t.cc.allocate(0, 10);
-    std::list<ConcurrencyControl::SlotPtr> acquired;
+    std::list<AcquiredSlotPtr> acquired;
     while (auto slot = allocation->tryAcquire())
         acquired.emplace_back(std::move(slot));
     ASSERT_TRUE(acquired.size() == 3); // 0 1 2
@@ -183,7 +183,7 @@ TEST(ConcurrencyControl, SetSlotCount)
 {
     ConcurrencyControlTest t(10);
     auto allocation = t.cc.allocate(5, 30);
-    std::vector<ConcurrencyControl::SlotPtr> acquired;
+    std::vector<AcquiredSlotPtr> acquired;
     while (auto slot = allocation->tryAcquire())
         acquired.emplace_back(std::move(slot));
     ASSERT_TRUE(acquired.size() == 10);
@@ -200,7 +200,7 @@ TEST(ConcurrencyControl, SetSlotCount)
     ASSERT_TRUE(acquired.size() == 5);
 
     // Check that newly added slots are equally distributed over waiting allocations
-    std::vector<ConcurrencyControl::SlotPtr> acquired2;
+    std::vector<AcquiredSlotPtr> acquired2;
     auto allocation2 = t.cc.allocate(0, 30);
     ASSERT_TRUE(!allocation->tryAcquire());
     t.cc.setMaxConcurrency(15); // 10 slots added: 5 to the first allocation and 5 to the second one
@@ -224,7 +224,7 @@ TEST(ConcurrencyControl, MultipleThreads)
 
     auto run_query = [&] (size_t max_threads)
     {
-        ConcurrencyControl::AllocationPtr slots = t.cc.allocate(1, max_threads);
+        SlotAllocationPtr slots = t.cc.allocate(1, max_threads);
         std::mutex threads_mutex;
         std::vector<std::thread> threads;
         threads.reserve(max_threads);
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 580aaa2b259..a06bacd7d3b 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -138,8 +138,8 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag)
         initializeExecution(1, true);
 
         // Acquire slot until we are done
-        single_thread_slot = slots->tryAcquire();
-        chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot");
+        single_thread_cpu_slot = cpu_slots->tryAcquire();
+        chassert(single_thread_cpu_slot && "Unable to allocate cpu slot for the first thread, but we just allocated at least one slot");
 
         if (yield_flag && *yield_flag)
             return true;
@@ -155,7 +155,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag)
         if (node->exception)
             std::rethrow_exception(node->exception);
 
-    single_thread_slot.reset();
+    single_thread_cpu_slot.reset();
     finalizeExecution();
 
     return false;
@@ -333,8 +333,8 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_
 
     /// Allocate CPU slots from concurrency control
     size_t min_threads = concurrency_control ? 1uz : num_threads;
-    slots = ConcurrencyControl::instance().allocate(min_threads, num_threads);
-    use_threads = slots->grantedCount();
+    cpu_slots = ConcurrencyControl::instance().allocate(min_threads, num_threads);
+    use_threads = cpu_slots->grantedCount();
 
     Queue queue;
     graph->initializeExecution(queue);
@@ -348,7 +348,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_
 
 void PipelineExecutor::spawnThreads()
 {
-    while (auto slot = slots->tryAcquire())
+    while (auto slot = cpu_slots->tryAcquire())
     {
         size_t thread_num = threads.fetch_add(1);
 
@@ -405,7 +405,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control)
     }
     else
     {
-        auto slot = slots->tryAcquire();
+        auto slot = cpu_slots->tryAcquire();
         executeSingleThread(0);
     }
 
diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h
index 862a460f0ed..cb74b524163 100644
--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@@ -68,8 +68,8 @@ private:
     ExecutorTasks tasks;
 
     /// Concurrency control related
-    ConcurrencyControl::AllocationPtr slots;
-    ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep()
+    SlotAllocationPtr cpu_slots;
+    AcquiredSlotPtr single_thread_cpu_slot; // cpu slot for single-thread mode to work using executeStep()
     std::unique_ptr<ThreadPool> pool;
     std::atomic_size_t threads = 0;
 

From 151ade2318f38adc5b732423a1ee1d228e1e5966 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Sun, 28 Jan 2024 21:38:21 +0100
Subject: [PATCH 155/884] Update src/Common/ISlotControl.h

---
 src/Common/ISlotControl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h
index add19f0cc0c..aa7414d5465 100644
--- a/src/Common/ISlotControl.h
+++ b/src/Common/ISlotControl.h
@@ -35,7 +35,7 @@ using SlotCount = UInt64;
 /// Unlimited number of slots
 constexpr SlotCount UnlimitedSlots = std::numeric_limits<SlotCount>::max();
 
-/// Acquired slot holder. Slot is considered to be acquired as long the object exists.
+/// Acquired slot holder. Slot is considered to be acquired as long as the object exists.
 class IAcquiredSlot : public std::enable_shared_from_this<IAcquiredSlot>, boost::noncopyable
 {
 public:

From 8798f469b3a0e38341e759f5cc98ca86b8220069 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jan 2024 22:27:21 +0100
Subject: [PATCH 156/884] Fix conflicts.

---
 tests/ci/ci_config.py | 47 +++++++------------------------------------
 1 file changed, 7 insertions(+), 40 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 0fc4b3505ce..8c8c45b877c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -50,6 +50,7 @@ class JobNames(metaclass=WithIter):
 
     STATELESS_TEST_DEBUG = "Stateless tests (debug)"
     STATELESS_TEST_RELEASE = "Stateless tests (release)"
+    STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)"
     STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)"
     STATELESS_TEST_ASAN = "Stateless tests (asan)"
     STATELESS_TEST_TSAN = "Stateless tests (tsan)"
@@ -64,6 +65,7 @@ class JobNames(metaclass=WithIter):
 
     STATEFUL_TEST_DEBUG = "Stateful tests (debug)"
     STATEFUL_TEST_RELEASE = "Stateful tests (release)"
+    STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)"
     STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)"
     STATEFUL_TEST_ASAN = "Stateful tests (asan)"
     STATEFUL_TEST_TSAN = "Stateful tests (tsan)"
@@ -763,18 +765,6 @@ CI_CONFIG = CIConfig(
     builds_report_config={
         JobNames.BUILD_CHECK: BuildReportConfig(
             builds=[
-<<<<<<< HEAD
-                "package_release",
-                "package_aarch64",
-                "package_asan",
-                "package_ubsan",
-                "package_tsan",
-                "package_msan",
-                "package_debug",
-                "package_release_coverage",
-                "binary_release",
-                "fuzzers",
-=======
                 Build.PACKAGE_RELEASE,
                 Build.PACKAGE_AARCH64,
                 Build.PACKAGE_ASAN,
@@ -782,9 +772,9 @@ CI_CONFIG = CIConfig(
                 Build.PACKAGE_TSAN,
                 Build.PACKAGE_MSAN,
                 Build.PACKAGE_DEBUG,
+                Build.PACKAGE_RELEASE_COVERAGE,
                 Build.BINARY_RELEASE,
                 Build.FUZZERS,
->>>>>>> master
             ]
         ),
         JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig(
@@ -872,33 +862,15 @@ CI_CONFIG = CIConfig(
         JobNames.STATEFUL_TEST_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
-<<<<<<< HEAD
-        "Stateful tests (coverage)": TestConfig(
-            "package_release_coverage", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+        JobNames.STATEFUL_TEST_RELEASE_COVERAGE: TestConfig(
+            Build.PACKAGE_RELEASE_COVERAGE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
-        "Stateful tests (aarch64)": TestConfig(
-            "package_aarch64", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
-        ),
-        "Stateful tests (release, DatabaseOrdinary)": TestConfig(
-            "package_release", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
-        ),
-        "Stateful tests (release, ParallelReplicas)": TestConfig(
-            "package_release", job_config=JobConfig(**stateful_test_common_params)  # type: ignore
-=======
         JobNames.STATEFUL_TEST_AARCH64: TestConfig(
             Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
-        # FIXME: delete?
-        # "Stateful tests (release, DatabaseOrdinary)": TestConfig(
-        #     Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
-        # ),
-        # "Stateful tests (release, DatabaseReplicated)": TestConfig(
-        #     Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
-        # ),
         # Stateful tests for parallel replicas
         JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
->>>>>>> master
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
@@ -939,16 +911,11 @@ CI_CONFIG = CIConfig(
         JobNames.STATELESS_TEST_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),
-<<<<<<< HEAD
-        "Stateless tests (coverage)": TestConfig(
-            "package_release_coverage", job_config=JobConfig(**statless_test_common_params)  # type: ignore
+        JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig(
+            Build.PACKAGE_RELEASE_COVERAGE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),
-        "Stateless tests (aarch64)": TestConfig(
-            "package_aarch64", job_config=JobConfig(**statless_test_common_params)  # type: ignore
-=======
         JobNames.STATELESS_TEST_AARCH64: TestConfig(
             Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params)  # type: ignore
->>>>>>> master
         ),
         JobNames.STATELESS_TEST_ANALYZER_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore

From 1ef8062c7701c1788abda4d5da7ee56a5b9de372 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jan 2024 22:28:09 +0100
Subject: [PATCH 157/884] Split by batches

---
 tests/ci/ci_config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 8c8c45b877c..0cfddbe0435 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -912,7 +912,8 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),
         JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig(
-            Build.PACKAGE_RELEASE_COVERAGE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
+            Build.PACKAGE_RELEASE_COVERAGE,
+            job_config=JobConfig(num_batches=6, **statless_test_common_params)  # type: ignore
         ),
         JobNames.STATELESS_TEST_AARCH64: TestConfig(
             Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params)  # type: ignore

From 982e3ddbddb0baaa2ac11f9dec74a8be8c8e8545 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jan 2024 23:11:03 +0100
Subject: [PATCH 158/884] Fix Python

---
 tests/ci/ci_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 0cfddbe0435..7458f25805f 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -913,7 +913,7 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig(
             Build.PACKAGE_RELEASE_COVERAGE,
-            job_config=JobConfig(num_batches=6, **statless_test_common_params)  # type: ignore
+            job_config=JobConfig(num_batches=6, **statless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_AARCH64: TestConfig(
             Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params)  # type: ignore

From 30f48e18938bbc5683d781f1cbfe7bfcf3fec8d9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jan 2024 23:54:35 +0100
Subject: [PATCH 159/884] Use MergeTree as a default table engine

---
 src/Core/Settings.h               | 2 +-
 src/Core/SettingsChangesHistory.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e0b3ca39899..4460a365846 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -558,7 +558,7 @@ class IColumn;
     M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
     \
     M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
-    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
+    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
     M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
     M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
     M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index dff0ebb759c..7bdab886934 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -100,6 +100,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
+              {"default_table_engine", DefaultTableEngine::None, DefaultTableEngine::MergeTree, "Set default table engine to MergeTree for better usability"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},

From a22b68f46fec54f98fc3c3cb9a9c1f597bae7ffd Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 29 Jan 2024 10:49:36 +0100
Subject: [PATCH 160/884] Added setting azure_max_single_part_copy_size

---
 src/Core/Settings.h                                         | 3 ++-
 .../AzureBlobStorage/AzureBlobStorageAuth.cpp               | 4 +++-
 .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.h    | 3 +++
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp        | 6 +-----
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 305d6466658..4ae5d1585f3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -82,7 +82,8 @@ class IColumn;
     M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \
     M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \
     M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
-    M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
+    M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0)                                                                             \
+    M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \
     M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
     M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \
     M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
index 02b0d5bb599..9da84d430e4 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp
@@ -7,6 +7,7 @@
 #include <optional>
 #include <azure/identity/managed_identity_credential.hpp>
 #include <Poco/Util/AbstractConfiguration.h>
+#include <Interpreters/Context.h>
 
 using namespace Azure::Storage::Blobs;
 
@@ -157,7 +158,7 @@ std::unique_ptr<BlobContainerClient> getAzureBlobContainerClient(
     }
 }
 
-std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/)
+std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
 {
     return std::make_unique<AzureObjectStorageSettings>(
         config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024),
@@ -166,6 +167,7 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
         config.getInt(config_prefix + ".max_single_download_retries", 3),
         config.getInt(config_prefix + ".list_object_keys_size", 1000),
         config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
+        config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size),
         config.getBool(config_prefix + ".use_native_copy", false)
     );
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 0ae12fb205f..18b1a70defe 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -25,6 +25,7 @@ struct AzureObjectStorageSettings
         int max_single_download_retries_,
         int list_object_keys_size_,
         size_t max_upload_part_size_,
+        size_t max_single_part_copy_size_,
         bool use_native_copy_)
         : max_single_part_upload_size(max_single_part_upload_size_)
         , min_bytes_for_seek(min_bytes_for_seek_)
@@ -32,6 +33,7 @@ struct AzureObjectStorageSettings
         , max_single_download_retries(max_single_download_retries_)
         , list_object_keys_size(list_object_keys_size_)
         , max_upload_part_size(max_upload_part_size_)
+        , max_single_part_copy_size(max_single_part_copy_size_)
         , use_native_copy(use_native_copy_)
     {
     }
@@ -46,6 +48,7 @@ struct AzureObjectStorageSettings
     size_t min_upload_part_size = 16 * 1024 * 1024;
     size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
     size_t max_part_number = 10000;
+    size_t max_single_part_copy_size = 256 * 1024 * 1024;
     bool use_native_copy = false;
 };
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 537a5a191e7..ff4cfe62feb 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -33,10 +33,6 @@ namespace ErrorCodes
     extern const int AZURE_BLOB_STORAGE_ERROR;
 }
 
-
-size_t max_single_operation_copy_size = 256 * 1024 * 1024;
-
-
 namespace
 {
     class UploadHelper
@@ -304,7 +300,7 @@ void copyAzureBlobStorageFile(
         auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
         auto source_uri = block_blob_client_src.GetUrl();
 
-        if (size < max_single_operation_copy_size)
+        if (size < settings.get()->max_single_part_copy_size)
         {
             block_blob_client_dest.CopyFromUri(source_uri);
         }

From 99a1b269d71054a1d4d1e59a55b229469652435c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 29 Jan 2024 11:00:59 +0100
Subject: [PATCH 161/884] Removed unwanted setting

---
 src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 18b1a70defe..7d5c8f07a75 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -47,7 +47,6 @@ struct AzureObjectStorageSettings
     int list_object_keys_size = 1000;
     size_t min_upload_part_size = 16 * 1024 * 1024;
     size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
-    size_t max_part_number = 10000;
     size_t max_single_part_copy_size = 256 * 1024 * 1024;
     bool use_native_copy = false;
 };

From ce0ebd964519d0961d92318e8a171d5338365213 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 29 Jan 2024 11:14:19 +0100
Subject: [PATCH 162/884] Removed unwanted log lines

---
 src/Backups/BackupImpl.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 0fb0d8cbda9..28a7d60b52c 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -535,7 +535,6 @@ void BackupImpl::checkBackupDoesntExist() const
     else
         file_name_to_check_existence = ".backup";
 
-    LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkBackupDoesntExist 1");
     if (writer->fileExists(file_name_to_check_existence))
         throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name_for_logging);
 
@@ -567,8 +566,6 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
 
     if (throw_if_failed)
     {
-        LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkLockFile");
-
         if (!writer->fileExists(lock_file_name))
         {
             throw Exception(

From 1ab29bef622a8de3af7bec194598e3939c9f2d7a Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 29 Jan 2024 15:33:09 +0000
Subject: [PATCH 163/884] fixes

---
 src/Functions/FunctionBinaryArithmetic.h      | 54 ++++++++++++++-----
 src/Functions/IsOperation.h                   |  6 +--
 .../00700_decimal_arithm.reference            | 14 ++---
 .../01717_int_div_float_too_large_ubsan.sql   |  4 +-
 .../02975_intdiv_with_decimal.reference       | 52 +++++++++++-------
 .../0_stateless/02975_intdiv_with_decimal.sql | 16 ++++++
 6 files changed, 101 insertions(+), 45 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index e34514d15fd..831c1cf3aeb 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -153,17 +153,18 @@ public:
     using ResultDataType = Switch<
         /// Result must be Integer
         Case<
-            only_integer && IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>,
+            only_integer && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>),
             Switch<
-                Case<std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>, DataTypeInt256>,
-                Case<std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>, DataTypeInt128>,
-                Case<std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>, DataTypeInt64>,
-                Case<std::is_same_v<LeftDataType, DataTypeDecimal32> || std::is_same_v<RightDataType, DataTypeDecimal32>, DataTypeInt32>>>,
-        Case<
-            only_integer,
-            Switch<
-                Case<IsIntegral<LeftDataType>, LeftDataType>,
-                Case<IsIntegral<RightDataType>, RightDataType>>>,
+                Case<
+                    IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>,
+                    Switch<
+                        Case<IsIntegralOrExtended<LeftDataType>, LeftDataType>,
+                        Case<IsIntegralOrExtended<RightDataType>, RightDataType>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>, DataTypeInt256>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>, DataTypeInt128>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>, DataTypeInt64>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal32> || std::is_same_v<RightDataType, DataTypeDecimal32>, DataTypeInt32>>>>>,
+
         /// Decimal cases
         Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
         Case<
@@ -1713,12 +1714,37 @@ public:
                             type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                         }
                     }
-                    else if constexpr ((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
-                        (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>))
-                        type_res = std::make_shared<DataTypeFloat64>();
+                    else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
+                        (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)) && !(is_div_int || is_div_int_or_zero))
+                    {
+                        if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal<LeftDataType>)
+                        {
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
+                                type_res = std::make_shared<DataTypeInt256>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
+                                type_res = std::make_shared<DataTypeInt128>();
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeFloat64>)
+                                type_res = std::make_shared<DataTypeInt64>();
+                            else
+                                type_res = std::make_shared<DataTypeInt32>();
+                        }
+                        else if constexpr (is_div_int || is_div_int_or_zero)
+                        {
+                            if constexpr (std::is_same_v<RightDataType, DataTypeDecimal256>)
+                                type_res = std::make_shared<DataTypeInt256>();
+                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal128>)
+                                type_res = std::make_shared<DataTypeInt128>();
+                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal64> || std::is_same_v<LeftDataType, DataTypeFloat64>)
+                                type_res = std::make_shared<DataTypeInt64>();
+                            else
+                                type_res = std::make_shared<DataTypeInt32>();
+                        }
+                        else
+                            type_res = std::make_shared<DataTypeFloat64>();
+                    }
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
                     {
-                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral<RightDataType>)
+                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended<RightDataType>)
                             type_res = std::make_shared<RightDataType>();
                         else if constexpr (is_div_int || is_div_int_or_zero)
                         {
diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h
index b36530591ef..b2c7a27d375 100644
--- a/src/Functions/IsOperation.h
+++ b/src/Functions/IsOperation.h
@@ -61,10 +61,8 @@ struct IsOperation
     static constexpr bool bit_hamming_distance = IsSameOperation<Op, BitHammingDistanceImpl>::value;
 
     static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo;
-
-    static constexpr bool division_allow_decimal = div_floating || modulo;
-
-    static constexpr bool allow_decimal = plus || minus || multiply || division_allow_decimal || least || greatest;
+    // NOTE: allow_decimal should not fully contain `division` because of divInt
+    static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest;
 };
 
 }
diff --git a/tests/queries/0_stateless/00700_decimal_arithm.reference b/tests/queries/0_stateless/00700_decimal_arithm.reference
index 811946c87e0..20f04696b1b 100644
--- a/tests/queries/0_stateless/00700_decimal_arithm.reference
+++ b/tests/queries/0_stateless/00700_decimal_arithm.reference
@@ -10,18 +10,18 @@
 63	21	-42	882	-882	2	0	2	0
 63	21	-42	882	-882	2	0	2	0
 1.00305798474369219219752355409390731264	-0.16305798474369219219752355409390731264	1.490591730234615865843651857942052864	-1.38847100762815390390123822295304634368	1.38847100762815390390123822295304634368	0.02	0.005
-63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2.02	0.505
-63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2.02	0.505
-63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2.02	0.505
-63.42	21.42	-41.58	890.82	-890.82	2.02	0.5	2.02	0.5
+63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2	0
+63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2	0
+63.42	21.42	-41.58	890.82	-890.82	2.02	0.505	2	0
+63.42	21.42	-41.58	890.82	-890.82	2.02	0.5	2	0
 63	-21	42	882	-882	0	2	0	2
 63	-21	42	882	-882	0	2	0	2
 63	-21	42	882	-882	0	2	0	2
 1.00305798474369219219752355409390731264	0.16305798474369219219752355409390731264	-1.490591730234615865843651857942052864	-1.38847100762815390390123822295304634368	1.38847100762815390390123822295304634368	-0.00000000000000000000000000000000000001	0.00000000000000000000000000000000000001
-63.42	-21.42	41.58	890.82	-890.82	0.495	1.98	0.495	1.98
+63.42	-21.42	41.58	890.82	-890.82	0.495	1.98	0	2
 63.42	-21.42	41.58	890.82	-890.82
-63.42	-21.42	41.58	890.82	-890.82	0.495049504950495049	1.980198019801980198	0.495049504950495049	1.980198019801980198
-63.42	-21.42	41.58	890.82	-890.82	0.49	1.98	0.49	1.98
+63.42	-21.42	41.58	890.82	-890.82	0.495049504950495049	1.980198019801980198	0	2
+63.42	-21.42	41.58	890.82	-890.82	0.49	1.98	0	2
 -42	42	42	42	0.42	0.42	0.42	42.42	42.42	42.42
 0	0	0	0	0	0	0	0	0	0
 42	-42	-42	-42	-0.42	-0.42	-0.42	-42.42	-42.42	-42.42
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
index c4f26a079f0..dc1e5b37050 100644
--- a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
+++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
@@ -1,2 +1,2 @@
-SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 }
-SELECT intDiv(9223372036854775807, 1.);  -- { serverError 153 }
+SELECT intDiv(18446744073709551615, 0.9998999834060669); -- { serverError 153 }
+SELECT intDiv(18446744073709551615, 1.);  -- { serverError 153 }
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
index 9c1faab21d7..594dcee975a 100644
--- a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
@@ -24,28 +24,44 @@
 2
 2
 2
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
 2
 2
 2
 2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
+1
+1
+1
+1
 2
 2
 2
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
index 8fc4b5a9a7d..18e657caa8a 100644
--- a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
@@ -25,6 +25,14 @@ SELECT intDiv(toDecimal256(4.4, 2), toDecimal128(2.2, 2));
 SELECT intDiv(toDecimal64(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDiv(toDecimal128(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDiv(toDecimal256(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDiv(4.2, toDecimal32(2.2, 2));
+SELECT intDiv(4.2, toDecimal64(2.2, 2));
+SELECT intDiv(4.2, toDecimal128(2.2, 2));
+SELECT intDiv(4.2, toDecimal256(2.2, 2));
+SELECT intDiv(toDecimal32(4.4, 2), 2.2);
+SELECT intDiv(toDecimal64(4.4, 2), 2.2);
+SELECT intDiv(toDecimal128(4.4, 2), 2.2);
+SELECT intDiv(toDecimal256(4.4, 2), 2.2);
 --intDivOrZero--
 SELECT intDivOrZero(4,2);
 SELECT intDivOrZero(toDecimal32(4.4, 2), 2);
@@ -52,3 +60,11 @@ SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal128(2.2, 2));
 SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal256(2.2, 2));
+SELECT intDivOrZero(4.2, toDecimal32(2.2, 2));
+SELECT intDivOrZero(4.2, toDecimal64(2.2, 2));
+SELECT intDivOrZero(4.2, toDecimal128(2.2, 2));
+SELECT intDivOrZero(4.2, toDecimal256(2.2, 2));
+SELECT intDivOrZero(toDecimal32(4.4, 2), 2.2);
+SELECT intDivOrZero(toDecimal64(4.4, 2), 2.2);
+SELECT intDivOrZero(toDecimal128(4.4, 2), 2.2);
+SELECT intDivOrZero(toDecimal256(4.4, 2), 2.2);

From 6bfa910d9ea403e91fb9be04573c73bfae77b4c4 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 29 Jan 2024 16:47:02 +0100
Subject: [PATCH 164/884] Fix merge

---
 src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 78a67f3e59a..8556f0237e3 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -47,12 +47,10 @@ struct AzureObjectStorageSettings
     size_t max_single_read_retries = 3;
     size_t max_single_download_retries = 3;
     int list_object_keys_size = 1000;
-<<<<<<< HEAD
     size_t min_upload_part_size = 16 * 1024 * 1024;
     size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
     size_t max_single_part_copy_size = 256 * 1024 * 1024;
     bool use_native_copy = false;
-=======
     size_t max_unexpected_write_error_retries = 4;
 >>>>>>> master
 };

From 4a8a7208f2a21236de1fa5140a2980a9bdf98974 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 29 Jan 2024 21:25:58 +0100
Subject: [PATCH 165/884] rename of settings, add setting for resultset, extend
 test, fix documentation and add to SettingsChanges log

---
 docs/en/interfaces/formats.md                 |  6 +--
 .../operations/settings/settings-formats.md   |  6 ++-
 docs/ru/interfaces/formats.md                 |  6 +--
 src/Core/Settings.h                           |  3 +-
 src/Core/SettingsChangesHistory.h             |  4 +-
 src/Formats/FormatFactory.cpp                 |  3 +-
 src/Formats/FormatSettings.h                  |  3 +-
 .../Impl/TemplateBlockOutputFormat.cpp        | 39 ++++++++++++-------
 ...0937_format_schema_rows_template.reference |  5 +++
 .../00937_format_schema_rows_template.sh      | 24 ++++++++++--
 10 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index a3f54c1c383..0f597282f9e 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -253,7 +253,7 @@ This format is also available under the name `TSVRawWithNamesAndNames`.
 
 This format allows specifying a custom format string with placeholders for values with a specified escaping rule.
 
-It uses settings `format_template_resultset`, `format_template_row` (`format_schema_rows_template`), `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further)
+It uses settings `format_template_resultset`, `format_template_row` (`format_template_row_format`), `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further)
 
 Setting `format_template_row` specifies the path to the file containing format strings for rows with the following syntax:
 
@@ -279,11 +279,11 @@ the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quo
 
 `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;`
 
-In cases where it is challenging or not possible to deploy format output configuration for the template format to a directory on all nodes in a cluster, or if the format is trivial then `format_schema_rows_template` can be used to pass the template string directly in the query, rather than a path to the file which contains it.
+In cases where it is challenging or not possible to deploy format output configuration for the template format to a directory on all nodes in a cluster, or if the format is trivial then `format_template_row_format` can be used to set the template string directly in the query, rather than a path to the file which contains it.
 
 The `format_template_rows_between_delimiter` setting specifies the delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default)
 
-Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names:
+Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Setting `format_template_resultset_format` can be used to set the template string for the result set directly in the query itself. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names:
 
 - `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string.
 - `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS)
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 5dedaa2f6ab..816812b1e3a 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -1660,6 +1660,10 @@ Result:
 
 Path to file which contains format string for result set (for Template format).
 
+### format_template_resultset_format {#format_template_resultset_format}
+
+Format string for result set (for Template format)
+
 ### format_template_row {#format_template_row}
 
 Path to file which contains format string for rows (for Template format).
@@ -1668,7 +1672,7 @@ Path to file which contains format string for rows (for Template format).
 
 Delimiter between rows (for Template format).
 
-### format_schema_rows_template {#format_schema_rows_template}
+### format_template_row_format {#format_template_row_format}
 
 Format string for rows (for Template format)
 
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 8f8197e2221..a9280de9c7b 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -201,7 +201,7 @@ SELECT * FROM nestedt FORMAT TSV
 
 Этот формат позволяет указать произвольную форматную строку, в которую подставляются значения, сериализованные выбранным способом.
 
-Для этого используются настройки `format_template_resultset`, `format_template_row` (`format_schema_rows_template`), `format_template_rows_between_delimiter` и настройки экранирования других форматов (например, `output_format_json_quote_64bit_integers` при экранировании как в `JSON`, см. далее)
+Для этого используются настройки `format_template_resultset`, `format_template_row` (`format_template_row_format`), `format_template_rows_between_delimiter` и настройки экранирования других форматов (например, `output_format_json_quote_64bit_integers` при экранировании как в `JSON`, см. далее)
 
 Настройка `format_template_row` задаёт путь к файлу, содержащему форматную строку для строк таблицы, которая должна иметь вид:
 
@@ -227,11 +227,11 @@ SELECT * FROM nestedt FORMAT TSV
 
     `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;`
 
-В тех случаях, когда не удобно или не возможно указать произвольную форматную строку в файле, можно использовать `format_schema_rows_template` указать произвольную форматную строку в запросе.    
+В тех случаях, когда не удобно или не возможно указать произвольную форматную строку в файле, можно использовать `format_template_row_format` указать произвольную форматную строку в запросе.  
 
 Настройка `format_template_rows_between_delimiter` задаёт разделитель между строками, который выводится (или ожмдается при вводе) после каждой строки, кроме последней. По умолчанию `\n`.
 
-Настройка `format_template_resultset` задаёт путь к файлу, содержащему форматную строку для результата. Форматная строка для результата имеет синтаксис аналогичный форматной строке для строк таблицы и позволяет указать префикс, суффикс и способ вывода дополнительной информации. Вместо имён столбцов в ней указываются следующие имена подстановок:
+Настройка `format_template_resultset` задаёт путь к файлу, содержащему форматную строку для результата. Настройка `format_template_resultset_format` используется для установки форматной строки для результата непосредственно в запросе. Форматная строка для результата имеет синтаксис аналогичный форматной строке для строк таблицы и позволяет указать префикс, суффикс и способ вывода дополнительной информации. Вместо имён столбцов в ней указываются следующие имена подстановок:
 
 -   `data` - строки с данными в формате `format_template_row`, разделённые `format_template_rows_between_delimiter`. Эта подстановка должна быть первой подстановкой в форматной строке.
 -   `totals` - строка с тотальными значениями в формате `format_template_row` (при использовании WITH TOTALS)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f9e3f401d98..bb946f0d861 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1080,7 +1080,8 @@ class IColumn;
     M(String, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
     M(String, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
     M(String, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
-    M(String, format_schema_rows_template, "", "Format string for rows (for Template format)", 0) \
+    M(String, format_template_row_format, "", "Format string for rows (for Template format)", 0) \
+    M(String, format_template_resultset_format, "", "Format string for result set (for Template format)", 0) \
     M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
     \
     M(EscapingRule, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 859ba99b5f7..8faf43c7e01 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -99,7 +99,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
-              {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}}},
+              {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
+              {"format_template_row_format", "none", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "none", "", "Template result set format string can be set in query"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 274994b4168..8c39b4b71e4 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -166,7 +166,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.template_settings.resultset_format = settings.format_template_resultset;
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.template_settings.row_format = settings.format_template_row;
-    format_settings.template_settings.row_format_schema = settings.format_schema_rows_template;
+    format_settings.template_settings.row_format_template = settings.format_template_row_format;
+    format_settings.template_settings.resultset_format_template = settings.format_template_resultset_format;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
     format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
     format_settings.tsv.enum_as_number = settings.input_format_tsv_enum_as_number;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 28a2076af84..bdd2dda5287 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -338,7 +338,8 @@ struct FormatSettings
         String resultset_format;
         String row_format;
         String row_between_delimiter;
-        String row_format_schema;
+        String row_format_template;
+        String resultset_format_template;
     } template_settings;
 
     struct
diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
index efda754917b..1c43a0fa331 100644
--- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp
@@ -194,13 +194,25 @@ void registerOutputFormatTemplate(FormatFactory & factory)
             const FormatSettings & settings)
     {
         ParsedTemplateFormatString resultset_format;
+        auto idx_resultset_by_name = [&](const String & partName)
+        {
+            return static_cast<size_t>(TemplateBlockOutputFormat::stringToResultsetPart(partName));
+        };
         if (settings.template_settings.resultset_format.empty())
         {
             /// Default format string: "${data}"
-            resultset_format.delimiters.resize(2);
-            resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
-            resultset_format.format_idx_to_column_idx.emplace_back(0);
-            resultset_format.column_names.emplace_back("data");
+            if (settings.template_settings.resultset_format_template.empty())
+            {
+                resultset_format.delimiters.resize(2);
+                resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
+                resultset_format.format_idx_to_column_idx.emplace_back(0);
+                resultset_format.column_names.emplace_back("data");
+            }
+            else
+            {
+                resultset_format = ParsedTemplateFormatString();
+                resultset_format.parse(settings.template_settings.resultset_format_template, idx_resultset_by_name);
+            }
         }
         else
         {
@@ -208,31 +220,32 @@ void registerOutputFormatTemplate(FormatFactory & factory)
             resultset_format = ParsedTemplateFormatString(
                     FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false,
                             settings.schema.is_server, settings.schema.format_schema_path),
-                    [&](const String & partName)
-                    {
-                        return static_cast<size_t>(TemplateBlockOutputFormat::stringToResultsetPart(partName));
-                    });
+                    idx_resultset_by_name);
+            if (!settings.template_settings.resultset_format_template.empty())
+            {
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_resultset or format_template_resultset_format, but not both");
+            }
         }
 
         ParsedTemplateFormatString row_format;
-        auto idx_by_name = [&](const String & colName)
+        auto idx_row_by_name = [&](const String & colName)
         {
             return sample.getPositionByName(colName);
         };
         if (settings.template_settings.row_format.empty())
         {
             row_format = ParsedTemplateFormatString();
-            row_format.parse(settings.template_settings.row_format_schema,idx_by_name);
+            row_format.parse(settings.template_settings.row_format_template, idx_row_by_name);
         }
         else
         {
             row_format = ParsedTemplateFormatString(
                 FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
                         settings.schema.is_server, settings.schema.format_schema_path),
-                idx_by_name);
-            if (!settings.template_settings.row_format_schema.empty())
+                idx_row_by_name);
+            if (!settings.template_settings.row_format_template.empty())
             {
-                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_schema_rows_template, but not both");
+                throw Exception(DB::ErrorCodes::INVALID_TEMPLATE_FORMAT, "Expected either format_template_row or format_template_row_format, but not both");
             }
         }
         return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter);
diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.reference b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
index 5f59cca2629..85bab456512 100644
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.reference
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.reference
@@ -2,3 +2,8 @@ Question: 'How awesome is clickhouse?', Answer: 'unbelievably awesome!', Number
 Question: 'How fast is clickhouse?', Answer: 'Lightning fast!', Number of Likes: 9876543210, Date: 2016-01-03;
 Question: 'Is it opensource?', Answer: 'of course it is!', Number of Likes: 789, Date: 2016-01-04
 
+===== Results ===== 
+Question: 'How awesome is clickhouse?', Answer: 'unbelievably awesome!', Number of Likes: 456, Date: 2016-01-02;
+Question: 'How fast is clickhouse?', Answer: 'Lightning fast!', Number of Likes: 9876543210, Date: 2016-01-03;
+Question: 'Is it opensource?', Answer: 'of course it is!', Number of Likes: 789, Date: 2016-01-04
+===================
diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index aff5de3b555..d773fedfd3d 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-# Test format_schema_rows_template setting 
+# Test format_template_row_format setting 
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template";
 $CLICKHOUSE_CLIENT --query="CREATE TABLE template (question String, answer String, likes UInt64, date Date) ENGINE = Memory";
@@ -15,17 +15,33 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO template VALUES
 ('Is it opensource?', 'of course it is!', 789, '2016-01-04')";
 
 $CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
-format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
 format_template_rows_between_delimiter = ';\n'";
 
 echo -e "\n"
 
-# Test that if both format_schema_rows_template setting and format_template_row are provided, error is thrown 
+# Test that if both format_template_row_format setting and format_template_row are provided, error is thrown 
 echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > "$CURDIR"/00937_template_output_format_row.tmp
 $CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
 format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
-format_schema_rows_template = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_rows_between_delimiter = ';\n'; --{clientError 474}"
+
+# Test format_template_resultset_format setting 
+
+$CLICKHOUSE_CLIENT --query="SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
+format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
+format_template_resultset_format = '===== Results ===== \n\${data}\n===================\n', \
+format_template_rows_between_delimiter = ';\n'";
+
+# Test that if both format_template_result_format setting and format_template_resultset are provided, error is thrown
+echo -ne '===== Resultset ===== \n \${data} \n ===============' > "$CURDIR"/00937_template_output_format_resultset.tmp
+$CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
+format_template_resultset = '$CURDIR/00937_template_output_format_resultset.tmp', \
+format_template_resultset_format = '===== Resultset ===== \n \${data} \n ===============', \
+format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
 format_template_rows_between_delimiter = ';\n'; --{clientError 474}"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE template";
 rm "$CURDIR"/00937_template_output_format_row.tmp
+rm "$CURDIR"/00937_template_output_format_resultset.tmp

From 8183074500b4d0d0755b48d01ff85215f16c3dfd Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Mon, 29 Jan 2024 21:32:42 +0100
Subject: [PATCH 166/884] Update src/Core/SettingsChangesHistory.h

set previous value as empty string for added setting rather than "none" in SettingsChangesHistory.h

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Core/SettingsChangesHistory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 960b64e5b6a..4a6a5d15be5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -102,8 +102,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"format_template_row_format", "none", "", "Template row format string can be set directly in query"},
-              {"format_template_resultset_format", "none", "", "Template result set format string can be set in query"},
+              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},

From 3f1ec9a9881949c7e676cf11f35fb75df3b95f78 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 30 Jan 2024 04:23:16 +0100
Subject: [PATCH 167/884] Fix error

---
 src/Core/SettingsChangesHistory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 692d8fc6360..53b14ddc385 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -102,7 +102,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"default_table_engine", DefaultTableEngine::None, DefaultTableEngine::MergeTree, "Set default table engine to MergeTree for better usability"},
+              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},

From 0ded5800112f95f7b13ca8d060e743559ce787e6 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 30 Jan 2024 04:03:27 +0000
Subject: [PATCH 168/884] Fix

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 14 +++++++++++++-
 .../replaceForPositionalArguments.cpp         | 19 +++++++++++++++----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index c683214840b..fbabef87112 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2170,7 +2170,19 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
         else // Int64
         {
             auto value = constant_node->getValue().get<Int64>();
-            pos = value > 0 ? value : projection_nodes.size() + value + 1;
+            if (value > 0)
+                pos = value;
+            else
+            {
+                if (static_cast<size_t>(std::abs(value)) > projection_nodes.size())
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Negtive positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}",
+                        value,
+                        projection_nodes.size(),
+                        scope.scope_node->formatASTForErrorMessage());
+                pos = projection_nodes.size() + value + 1;
+            }
         }
 
         if (!pos || pos > projection_nodes.size())
diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index bea87ad913a..c72cac25c9d 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -10,7 +10,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int BAD_ARGUMENTS;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_query, ASTSelectQuery::Expression expression)
@@ -39,7 +40,18 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
     else if (which == Field::Types::Int64)
     {
         auto value = ast_literal->value.get<Int64>();
-        pos = value > 0 ? value : columns.size() + value + 1;
+        if (value > 0)
+            pos = value;
+        else
+        {
+            if (static_cast<size_t>(std::abs(value)) > columns.size())
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Negtive positional argument number {} is out of bounds. Expected in range [-{}, -1]",
+                    value,
+                    columns.size());
+            pos = columns.size() + value + 1;
+        }
     }
     else
     {
@@ -47,8 +59,7 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
     }
 
     if (!pos || pos > columns.size())
-        throw Exception(
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size());
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size());
 
     const auto & column = columns[--pos];
     if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))

From 4f12ca249d4d728c403f52c1d68edda68a9af286 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 30 Jan 2024 07:01:07 +0000
Subject: [PATCH 169/884] Fix typo

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp          | 2 +-
 src/Interpreters/replaceForPositionalArguments.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 54767c88993..d9434c878d2 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2180,7 +2180,7 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
                 if (static_cast<size_t>(std::abs(value)) > projection_nodes.size())
                     throw Exception(
                         ErrorCodes::BAD_ARGUMENTS,
-                        "Negtive positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}",
+                        "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}",
                         value,
                         projection_nodes.size(),
                         scope.scope_node->formatASTForErrorMessage());
diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index c72cac25c9d..cceb0650fcd 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -47,7 +47,7 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
             if (static_cast<size_t>(std::abs(value)) > columns.size())
                 throw Exception(
                     ErrorCodes::BAD_ARGUMENTS,
-                    "Negtive positional argument number {} is out of bounds. Expected in range [-{}, -1]",
+                    "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]",
                     value,
                     columns.size());
             pos = columns.size() + value + 1;

From bab6e6fe3402f8408acba177a6d4318c9b90ea1b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 30 Jan 2024 11:37:32 +0100
Subject: [PATCH 170/884] Fix tests

---
 tests/integration/test_storage_azure_blob_storage/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 75ef50ec12a..e1d636f3831 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -967,7 +967,7 @@ def test_union_schema_inference_mode(cluster):
         f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
         expect_error="true",
     )
-    assert "Cannot extract table structure" in error
+    assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error
 
 
 def test_schema_inference_cache(cluster):

From 416910db00b2a7fe1cc32aeb7396b494ce2cc2f3 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 30 Jan 2024 13:03:43 +0000
Subject: [PATCH 171/884] Remove unnecessary outer loop

---
 src/Common/PoolWithFailoverBase.h | 69 ++++++++++++++-----------------
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 6da4445950c..ef4bb40535f 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -249,51 +249,44 @@ PoolWithFailoverBase<TNestedPool>::getMany(
     });
 
     std::string fail_messages;
-    bool finished = false;
-    while (!finished)
+    for (size_t i = 0; i < shuffled_pools.size(); ++i)
     {
-        for (size_t i = 0; i < shuffled_pools.size(); ++i)
+        if (up_to_date_count >= max_entries /// Already enough good entries.
+            || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
+            break;
+
+        ShuffledPool & shuffled_pool = shuffled_pools[i];
+        TryResult & result = try_results[i];
+        if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
+            continue;
+
+        std::string fail_message;
+        result = try_get_entry(*shuffled_pool.pool, fail_message);
+
+        if (!fail_message.empty())
+            fail_messages += fail_message + '\n';
+
+        if (!result.entry.isNull())
         {
-            if (up_to_date_count >= max_entries /// Already enough good entries.
-                || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
+            ++entries_count;
+            if (result.is_usable)
             {
-                finished = true;
-                break;
+                ++usable_count;
+                if (result.is_up_to_date)
+                    ++up_to_date_count;
             }
+        }
+        else
+        {
+            LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
+            ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
 
-            ShuffledPool & shuffled_pool = shuffled_pools[i];
-            TryResult & result = try_results[i];
-            if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
-                continue;
+            shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
 
-            std::string fail_message;
-            result = try_get_entry(*shuffled_pool.pool, fail_message);
-
-            if (!fail_message.empty())
-                fail_messages += fail_message + '\n';
-
-            if (!result.entry.isNull())
+            if (shuffled_pool.error_count >= max_tries)
             {
-                ++entries_count;
-                if (result.is_usable)
-                {
-                    ++usable_count;
-                    if (result.is_up_to_date)
-                        ++up_to_date_count;
-                }
-            }
-            else
-            {
-                LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
-                ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
-
-                shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
-
-                if (shuffled_pool.error_count >= max_tries)
-                {
-                    ++failed_pools_count;
-                    ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
-                }
+                ++failed_pools_count;
+                ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
             }
         }
     }

From 6624e34580caaac255c39d5edcd1b136007839c9 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 30 Jan 2024 13:06:47 +0000
Subject: [PATCH 172/884] RemoteQueryExecutor constructors formatting

---
 src/QueryPipeline/RemoteQueryExecutor.cpp | 36 ++++++++++++++++-------
 src/QueryPipeline/RemoteQueryExecutor.h   | 33 +++++++++++++++------
 2 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 136a3bb09c6..1caedfc8511 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -64,9 +64,14 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 
 RemoteQueryExecutor::RemoteQueryExecutor(
     Connection & connection,
-    const String & query_, const Block & header_, ContextPtr context_,
-    ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
-    QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
+    const String & query_,
+    const Block & header_,
+    ContextPtr context_,
+    ThrottlerPtr throttler,
+    const Scalars & scalars_,
+    const Tables & external_tables_,
+    QueryProcessingStage::Enum stage_,
+    std::optional<Extension> extension_)
     : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
 {
     create_connections = [this, &connection, throttler, extension_](AsyncCallback)
@@ -80,9 +85,14 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 
 RemoteQueryExecutor::RemoteQueryExecutor(
     std::shared_ptr<Connection> connection_ptr,
-    const String & query_, const Block & header_, ContextPtr context_,
-    ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
-    QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
+    const String & query_,
+    const Block & header_,
+    ContextPtr context_,
+    ThrottlerPtr throttler,
+    const Scalars & scalars_,
+    const Tables & external_tables_,
+    QueryProcessingStage::Enum stage_,
+    std::optional<Extension> extension_)
     : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
 {
     create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback)
@@ -96,12 +106,18 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 
 RemoteQueryExecutor::RemoteQueryExecutor(
     std::vector<IConnectionPool::Entry> && connections_,
-    const String & query_, const Block & header_, ContextPtr context_,
-    const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
-    QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
+    const String & query_,
+    const Block & header_,
+    ContextPtr context_,
+    const ThrottlerPtr & throttler,
+    const Scalars & scalars_,
+    const Tables & external_tables_,
+    QueryProcessingStage::Enum stage_,
+    std::optional<Extension> extension_)
     : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
 {
-    create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable {
+    create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable
+    {
         auto res = std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
         if (extension_ && extension_->replica_info)
             res->setReplicaInfo(*extension_->replica_info);
diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index 444f1258f3e..e874b4be726 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -56,23 +56,38 @@ public:
     /// Takes already set connection.
     RemoteQueryExecutor(
         Connection & connection,
-        const String & query_, const Block & header_, ContextPtr context_,
-        ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional<Extension> extension_ = std::nullopt);
+        const String & query_,
+        const Block & header_,
+        ContextPtr context_,
+        ThrottlerPtr throttler_ = nullptr,
+        const Scalars & scalars_ = Scalars(),
+        const Tables & external_tables_ = Tables(),
+        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
+        std::optional<Extension> extension_ = std::nullopt);
 
     /// Takes already set connection.
     RemoteQueryExecutor(
         std::shared_ptr<Connection> connection,
-        const String & query_, const Block & header_, ContextPtr context_,
-        ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional<Extension> extension_ = std::nullopt);
+        const String & query_,
+        const Block & header_,
+        ContextPtr context_,
+        ThrottlerPtr throttler_ = nullptr,
+        const Scalars & scalars_ = Scalars(),
+        const Tables & external_tables_ = Tables(),
+        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
+        std::optional<Extension> extension_ = std::nullopt);
 
     /// Accepts several connections already taken from pool.
     RemoteQueryExecutor(
         std::vector<IConnectionPool::Entry> && connections_,
-        const String & query_, const Block & header_, ContextPtr context_,
-        const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
-        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional<Extension> extension_ = std::nullopt);
+        const String & query_,
+        const Block & header_,
+        ContextPtr context_,
+        const ThrottlerPtr & throttler = nullptr,
+        const Scalars & scalars_ = Scalars(),
+        const Tables & external_tables_ = Tables(),
+        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
+        std::optional<Extension> extension_ = std::nullopt);
 
     /// Takes a pool and gets one or several connections from it.
     RemoteQueryExecutor(

From c891ed03c1ae5d85acc1b5f124433d634d23b278 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 30 Jan 2024 14:19:10 +0100
Subject: [PATCH 173/884] update test to use CLICKHOUSE_TEST_UNIQUE_NAME so
 parallel tests don't fail

---
 .../00937_format_schema_rows_template.sh         | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/00937_format_schema_rows_template.sh b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
index d773fedfd3d..0221527f9c9 100755
--- a/tests/queries/0_stateless/00937_format_schema_rows_template.sh
+++ b/tests/queries/0_stateless/00937_format_schema_rows_template.sh
@@ -20,10 +20,11 @@ format_template_rows_between_delimiter = ';\n'";
 
 echo -e "\n"
 
-# Test that if both format_template_row_format setting and format_template_row are provided, error is thrown 
-echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > "$CURDIR"/00937_template_output_format_row.tmp
+# Test that if both format_template_row_format setting and format_template_row are provided, error is thrown
+row_format_file="$CURDIR"/"${CLICKHOUSE_TEST_UNIQUE_NAME}"_template_output_format_row.tmp
+echo -ne 'Question: ${question:Quoted}, Answer: ${answer:Quoted}, Number of Likes: ${likes:Raw}, Date: ${date:Raw}' > $row_format_file
 $CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
-format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
+format_template_row = '$row_format_file', \
 format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
 format_template_rows_between_delimiter = ';\n'; --{clientError 474}"
 
@@ -35,13 +36,14 @@ format_template_resultset_format = '===== Results ===== \n\${data}\n============
 format_template_rows_between_delimiter = ';\n'";
 
 # Test that if both format_template_result_format setting and format_template_resultset are provided, error is thrown
-echo -ne '===== Resultset ===== \n \${data} \n ===============' > "$CURDIR"/00937_template_output_format_resultset.tmp
+resultset_output_file="$CURDIR"/"$CLICKHOUSE_TEST_UNIQUE_NAME"_template_output_format_resultset.tmp
+echo -ne '===== Resultset ===== \n \${data} \n ===============' > $resultset_output_file
 $CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \
-format_template_resultset = '$CURDIR/00937_template_output_format_resultset.tmp', \
+format_template_resultset = '$resultset_output_file', \
 format_template_resultset_format = '===== Resultset ===== \n \${data} \n ===============', \
 format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \
 format_template_rows_between_delimiter = ';\n'; --{clientError 474}"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE template";
-rm "$CURDIR"/00937_template_output_format_row.tmp
-rm "$CURDIR"/00937_template_output_format_resultset.tmp
+rm $row_format_file
+rm $resultset_output_file

From 7a1458c9227f47de485a06e6e473d059da381631 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 30 Jan 2024 15:21:58 +0000
Subject: [PATCH 174/884] Fix validating suspicious/experimental types in
 nested types

---
 .../parseColumnsListForTableFunction.cpp      | 29 +++++++++++++++++--
 .../02981_nested_bad_types.reference          |  0
 .../0_stateless/02981_nested_bad_types.sql    | 27 +++++++++++++++++
 3 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02981_nested_bad_types.reference
 create mode 100644 tests/queries/0_stateless/02981_nested_bad_types.sql

diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp
index 551a883d093..fcdad7c93c1 100644
--- a/src/Interpreters/parseColumnsListForTableFunction.cpp
+++ b/src/Interpreters/parseColumnsListForTableFunction.cpp
@@ -7,6 +7,9 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
 
 
 namespace DB
@@ -48,8 +51,7 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings
 
     if (!settings.allow_suspicious_fixed_string_types)
     {
-        auto basic_type = removeLowCardinalityAndNullable(type);
-        if (const auto * fixed_string = typeid_cast<const DataTypeFixedString *>(basic_type.get()))
+        if (const auto * fixed_string = typeid_cast<const DataTypeFixedString *>(type.get()))
         {
             if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS)
                 throw Exception(
@@ -71,6 +73,29 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings
                 "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName());
         }
     }
+
+    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
+    {
+        validateDataType(nullable_type->getNestedType(), settings);
+    }
+    else if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+    {
+        validateDataType(lc_type->getDictionaryType(), settings);
+    }
+    else if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
+    {
+        validateDataType(array_type->getNestedType(), settings);
+    }
+    else if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
+    {
+        for (const auto & element : tuple_type->getElements())
+            validateDataType(element, settings);
+    }
+    else if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
+    {
+        validateDataType(map_type->getKeyType(), settings);
+        validateDataType(map_type->getValueType(), settings);
+    }
 }
 
 ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context)
diff --git a/tests/queries/0_stateless/02981_nested_bad_types.reference b/tests/queries/0_stateless/02981_nested_bad_types.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql
new file mode 100644
index 00000000000..663d39cb1e2
--- /dev/null
+++ b/tests/queries/0_stateless/02981_nested_bad_types.sql
@@ -0,0 +1,27 @@
+set allow_suspicious_low_cardinality_types=0;
+set allow_suspicious_fixed_string_types=0;
+set allow_experimental_variant_type=0;
+
+select [42]::Array(LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select [[[42]]]::Array(Array(Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+
+select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select [[[42]]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+select map('a', 42)::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select map('a', map('b', [42]))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', 42)::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+
+select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
+select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
+select [[[42]]]::Array(Array(Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN}
+select map('a', 42)::Map(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
+select map('a', map('b', [42]))::Map(String, Map(String, Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', 42)::Tuple(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN}
+

From 0557cdb8a9def2e4c8df81d23cb526153ce023f8 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 30 Jan 2024 15:31:04 +0000
Subject: [PATCH 175/884] fix due to review

---
 src/Functions/FunctionBinaryArithmetic.h | 40 ++++++++++--------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 831c1cf3aeb..62a50f5e0c2 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -156,14 +156,18 @@ public:
             only_integer && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>),
             Switch<
                 Case<
-                    IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>,
+                    IsDataTypeDecimal<LeftDataType>,
+                    Switch<
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal256>, DataTypeInt256>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal128>, DataTypeInt128>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal64>, DataTypeInt64>,
+                        Case<std::is_same_v<LeftDataType, DataTypeDecimal32>, DataTypeInt32>>>,
+                Case<
+                    IsDataTypeDecimal<RightDataType>,
                     Switch<
                         Case<IsIntegralOrExtended<LeftDataType>, LeftDataType>,
-                        Case<IsIntegralOrExtended<RightDataType>, RightDataType>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>, DataTypeInt256>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>, DataTypeInt128>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>, DataTypeInt64>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal32> || std::is_same_v<RightDataType, DataTypeDecimal32>, DataTypeInt32>>>>>,
+                        Case<std::is_same_v<LeftDataType, DataTypeFloat64>, DataTypeInt64>,
+                        Case<std::is_same_v<LeftDataType, DataTypeFloat32>, DataTypeInt32>>>>>,
 
         /// Decimal cases
         Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
@@ -1684,11 +1688,11 @@ public:
                     {
                         if constexpr (is_div_int || is_div_int_or_zero)
                         {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256> || std::is_same_v<RightDataType, DataTypeDecimal256>)
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
                                 type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128> || std::is_same_v<RightDataType, DataTypeDecimal128>)
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
                                 type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeDecimal64>)
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
                                 type_res = std::make_shared<DataTypeInt64>();
                             else
                                 type_res = std::make_shared<DataTypeInt32>();
@@ -1723,18 +1727,14 @@ public:
                                 type_res = std::make_shared<DataTypeInt256>();
                             else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
                                 type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64> || std::is_same_v<RightDataType, DataTypeFloat64>)
+                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
                                 type_res = std::make_shared<DataTypeInt64>();
                             else
                                 type_res = std::make_shared<DataTypeInt32>();
                         }
                         else if constexpr (is_div_int || is_div_int_or_zero)
                         {
-                            if constexpr (std::is_same_v<RightDataType, DataTypeDecimal256>)
-                                type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal128>)
-                                type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal64> || std::is_same_v<LeftDataType, DataTypeFloat64>)
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeFloat64>)
                                 type_res = std::make_shared<DataTypeInt64>();
                             else
                                 type_res = std::make_shared<DataTypeInt32>();
@@ -1744,9 +1744,7 @@ public:
                     }
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
                     {
-                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended<RightDataType>)
-                            type_res = std::make_shared<RightDataType>();
-                        else if constexpr (is_div_int || is_div_int_or_zero)
+                        if constexpr (is_div_int || is_div_int_or_zero)
                         {
                             if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
                                 type_res = std::make_shared<DataTypeInt256>();
@@ -1766,11 +1764,7 @@ public:
                             type_res = std::make_shared<LeftDataType>();
                         else if constexpr (is_div_int || is_div_int_or_zero)
                         {
-                            if constexpr (std::is_same_v<RightDataType, DataTypeDecimal256>)
-                                type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal128>)
-                                type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<RightDataType, DataTypeDecimal64>)
+                            if constexpr (std::is_same_v<LeftDataType, DataTypeFloat64>)
                                 type_res = std::make_shared<DataTypeInt64>();
                             else
                                 type_res = std::make_shared<DataTypeInt32>();

From 998c56fc3d3602a1151c7e310863e12666e595e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 30 Jan 2024 17:36:34 +0100
Subject: [PATCH 176/884] Move code

---
 src/Compression/CompressionCodecT64.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index 42c6a18aa77..3ddc56fe4f6 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -168,6 +168,7 @@ TypeIndex baseType(TypeIndex type_idx)
             return TypeIndex::Int16;
         case TypeIndex::Int32:
         case TypeIndex::Decimal32:
+        case TypeIndex::Date32:
             return TypeIndex::Int32;
         case TypeIndex::Int64:
         case TypeIndex::Decimal64:
@@ -180,8 +181,6 @@ TypeIndex baseType(TypeIndex type_idx)
         case TypeIndex::Enum16:
         case TypeIndex::Date:
             return TypeIndex::UInt16;
-        case TypeIndex::Date32:
-            return TypeIndex::Int32;
         case TypeIndex::UInt32:
         case TypeIndex::DateTime:
         case TypeIndex::IPv4:

From e2a66f8e6594fcb8c95f47a6f2670869c78a4a35 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 30 Jan 2024 16:39:52 +0000
Subject: [PATCH 177/884] Fix tests

---
 tests/queries/0_stateless/02010_array_index_bad_cast.sql        | 1 +
 .../0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql
index 19c58bb28a7..42a6556fc77 100644
--- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql
+++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql
@@ -1,2 +1,3 @@
 -- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code).
+SET allow_suspicious_low_cardinality_types=1;
 SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError 44 }
diff --git a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2
index 79a7c654f10..95bac76c591 100644
--- a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2
+++ b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2
@@ -1,4 +1,4 @@
-
+SET allow_suspicious_low_cardinality_types=1;
 
 DROP TABLE IF EXISTS test1__fuzz_36;
 DROP TABLE IF EXISTS test1__fuzz_38;

From 0576aa2b7fd060c68f482f8205575bd904356ebe Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 30 Jan 2024 16:45:36 +0000
Subject: [PATCH 178/884] fix fuzzer

---
 src/Functions/FunctionBinaryArithmetic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 62a50f5e0c2..e31183573c3 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -1719,7 +1719,7 @@ public:
                         }
                     }
                     else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
-                        (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)) && !(is_div_int || is_div_int_or_zero))
+                        (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)))
                     {
                         if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal<LeftDataType>)
                         {
@@ -1760,7 +1760,7 @@ public:
                     }
                     else if constexpr (IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral<LeftDataType>)
+                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended<LeftDataType>)
                             type_res = std::make_shared<LeftDataType>();
                         else if constexpr (is_div_int || is_div_int_or_zero)
                         {

From 023b8cbd53c1d3788e97d17b0329e3330c1cc0eb Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 30 Jan 2024 17:47:11 +0100
Subject: [PATCH 179/884] Retry disconnects and expired sessions

---
 .../System/StorageSystemZooKeeper.cpp         | 39 +++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 37fe9074950..9a671f08138 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -424,9 +424,35 @@ void ReadFromSystemZooKeeper::applyFilters()
     paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
 }
 
+/// Executes a request to Keeper and retries it in case of expired sessions and disconnects
+template <typename Result, typename Operation>
+static Result runWithReconnects(Operation && operation, ContextPtr context, QueryStatusPtr query_status)
+{
+    constexpr int max_retries = 20; /// Limit retries by some reasonable number to avoid infinite loops
+    for (int attempt = 0; ; ++attempt)
+    {
+        if (query_status)
+            query_status->checkTimeLimit();
+
+        zkutil::ZooKeeperPtr keeper = context->getZooKeeper();
+
+        try
+        {
+            return operation(keeper);
+        }
+        catch (const Coordination::Exception & e)
+        {
+            if (!Coordination::isHardwareError(e.code) ||
+                attempt >= max_retries ||
+                e.code == Coordination::Error::ZOPERATIONTIMEOUT)
+                throw;
+        }
+    }
+}
+
 void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
 {
-    zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper();
+    QueryStatusPtr query_status = context->getProcessListElement();
 
     if (paths.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
@@ -448,6 +474,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
     std::unordered_set<String> added;
     while (!paths.empty())
     {
+        if (query_status)
+            query_status->checkTimeLimit();
+
         list_tasks.clear();
         std::vector<String> paths_to_list;
         while (!paths.empty() && static_cast<Int64>(list_tasks.size()) < max_inflight_requests)
@@ -470,7 +499,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
             paths_to_list.emplace_back(task.path_corrected);
             list_tasks.emplace_back(std::move(task));
         }
-        auto list_responses = zookeeper->tryGetChildren(paths_to_list);
+        auto list_responses = runWithReconnects<zkutil::ZooKeeper::MultiTryGetChildrenResponse>(
+            [&paths_to_list](zkutil::ZooKeeperPtr zookeeper) { return zookeeper->tryGetChildren(paths_to_list); },
+            context, query_status);
 
         struct GetTask
         {
@@ -514,7 +545,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
             }
         }
 
-        auto get_responses = zookeeper->tryGet(paths_to_get);
+        auto get_responses = runWithReconnects<zkutil::ZooKeeper::MultiTryGetResponse>(
+            [&paths_to_get](zkutil::ZooKeeperPtr zookeeper) { return zookeeper->tryGet(paths_to_get); },
+            context, query_status);
 
         for (size_t i = 0, size = get_tasks.size(); i < size; ++i)
         {

From 82c06ca2949601989699afac6a6bddd05ef2d4f6 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 30 Jan 2024 18:31:37 +0100
Subject: [PATCH 180/884] Use ZooKeeperRetriesControl

---
 .../System/StorageSystemZooKeeper.cpp         | 40 ++++---------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 9a671f08138..61919f53b24 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -1,6 +1,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <Storages/MergeTree/ZooKeeperRetries.h>
 #include <Storages/System/StorageSystemZooKeeper.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Parsers/ASTSelectQuery.h>
@@ -424,32 +425,6 @@ void ReadFromSystemZooKeeper::applyFilters()
     paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
 }
 
-/// Executes a request to Keeper and retries it in case of expired sessions and disconnects
-template <typename Result, typename Operation>
-static Result runWithReconnects(Operation && operation, ContextPtr context, QueryStatusPtr query_status)
-{
-    constexpr int max_retries = 20; /// Limit retries by some reasonable number to avoid infinite loops
-    for (int attempt = 0; ; ++attempt)
-    {
-        if (query_status)
-            query_status->checkTimeLimit();
-
-        zkutil::ZooKeeperPtr keeper = context->getZooKeeper();
-
-        try
-        {
-            return operation(keeper);
-        }
-        catch (const Coordination::Exception & e)
-        {
-            if (!Coordination::isHardwareError(e.code) ||
-                attempt >= max_retries ||
-                e.code == Coordination::Error::ZOPERATIONTIMEOUT)
-                throw;
-        }
-    }
-}
-
 void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
 {
     QueryStatusPtr query_status = context->getProcessListElement();
@@ -499,9 +474,10 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
             paths_to_list.emplace_back(task.path_corrected);
             list_tasks.emplace_back(std::move(task));
         }
-        auto list_responses = runWithReconnects<zkutil::ZooKeeper::MultiTryGetChildrenResponse>(
-            [&paths_to_list](zkutil::ZooKeeperPtr zookeeper) { return zookeeper->tryGetChildren(paths_to_list); },
-            context, query_status);
+
+        zkutil::ZooKeeper::MultiTryGetChildrenResponse list_responses;
+        ZooKeeperRetriesControl("", nullptr, ZooKeeperRetriesInfo(20, 1, 1000), query_status).retryLoop(
+            [&]() { list_responses = context->getZooKeeper()->tryGetChildren(paths_to_list); });
 
         struct GetTask
         {
@@ -545,9 +521,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
             }
         }
 
-        auto get_responses = runWithReconnects<zkutil::ZooKeeper::MultiTryGetResponse>(
-            [&paths_to_get](zkutil::ZooKeeperPtr zookeeper) { return zookeeper->tryGet(paths_to_get); },
-            context, query_status);
+        zkutil::ZooKeeper::MultiTryGetResponse get_responses;
+        ZooKeeperRetriesControl("", nullptr, ZooKeeperRetriesInfo(20, 1, 1000), query_status).retryLoop(
+            [&]() { get_responses = context->getZooKeeper()->tryGet(paths_to_get); });
 
         for (size_t i = 0, size = get_tasks.size(); i < size; ++i)
         {

From c348c4e828e6d2c4978d03936e82e825a5966a59 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 30 Jan 2024 19:02:17 +0100
Subject: [PATCH 181/884] Move ZooKeeperRetries.h to Common

---
 src/Backups/BackupEntriesCollector.h                            | 2 +-
 src/Backups/WithRetries.h                                       | 2 +-
 src/{Storages/MergeTree => Common/ZooKeeper}/ZooKeeperRetries.h | 0
 src/Interpreters/executeDDLQueryOnCluster.h                     | 2 +-
 src/Storages/MergeTree/ReplicatedMergeTreeSink.h                | 2 +-
 src/Storages/System/StorageSystemZooKeeper.cpp                  | 2 +-
 6 files changed, 5 insertions(+), 5 deletions(-)
 rename src/{Storages/MergeTree => Common/ZooKeeper}/ZooKeeperRetries.h (100%)

diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h
index bad67e494c4..01e8d594334 100644
--- a/src/Backups/BackupEntriesCollector.h
+++ b/src/Backups/BackupEntriesCollector.h
@@ -6,7 +6,7 @@
 #include <Parsers/ASTBackupQuery.h>
 #include <Storages/IStorage_fwd.h>
 #include <Storages/TableLockHolder.h>
-#include <Storages/MergeTree/ZooKeeperRetries.h>
+#include <Common/ZooKeeper/ZooKeeperRetries.h>
 #include <filesystem>
 #include <queue>
 
diff --git a/src/Backups/WithRetries.h b/src/Backups/WithRetries.h
index 3a6e28996b9..f795a963911 100644
--- a/src/Backups/WithRetries.h
+++ b/src/Backups/WithRetries.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/MergeTree/ZooKeeperRetries.h>
+#include <Common/ZooKeeper/ZooKeeperRetries.h>
 #include <Common/ZooKeeper/Common.h>
 #include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
 
diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Common/ZooKeeper/ZooKeeperRetries.h
similarity index 100%
rename from src/Storages/MergeTree/ZooKeeperRetries.h
rename to src/Common/ZooKeeper/ZooKeeperRetries.h
diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h
index 7daf9babf9f..d3365553875 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.h
+++ b/src/Interpreters/executeDDLQueryOnCluster.h
@@ -5,7 +5,7 @@
 #include <Processors/ISource.h>
 #include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST_fwd.h>
-#include <Storages/MergeTree/ZooKeeperRetries.h>
+#include <Common/ZooKeeper/ZooKeeperRetries.h>
 
 
 namespace zkutil
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
index bc23204e7d3..29f3183be64 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@@ -3,7 +3,7 @@
 #include <Processors/Sinks/SinkToStorage.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <base/types.h>
-#include <Storages/MergeTree/ZooKeeperRetries.h>
+#include <Common/ZooKeeper/ZooKeeperRetries.h>
 #include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
 #include <Storages/MergeTree/AsyncBlockIDsCache.h>
 
diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 61919f53b24..6aa85e6a9e9 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -1,7 +1,6 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
-#include <Storages/MergeTree/ZooKeeperRetries.h>
 #include <Storages/System/StorageSystemZooKeeper.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Parsers/ASTSelectQuery.h>
@@ -13,6 +12,7 @@
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/ZooKeeper/ZooKeeperRetries.h>
 #include <Common/typeid_cast.h>
 #include <Columns/ColumnSet.h>
 #include <Columns/ColumnConst.h>

From 299c390d2b17e118a0fc87a21bc8859d135e006b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 30 Jan 2024 15:56:41 +0100
Subject: [PATCH 182/884] Add some fuzzing to ASTLiterals

---
 src/Client/QueryFuzzer.cpp | 57 ++++++++++++++++++++++++++++++++++----
 src/Client/QueryFuzzer.h   |  2 ++
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 629d36e7960..786d5af0cb3 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -903,6 +903,54 @@ void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
         remove_fuzzed_table(insert->getTable());
 }
 
+ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
+{
+    auto * l = child->as<ASTLiteral>();
+    chassert(l);
+    auto type = l->value.getType();
+    if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0)
+    {
+        String value = l->value.get<String>();
+        child = makeASTFunction(
+            "toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
+    }
+
+    if (fuzz_rand() % 11 == 0)
+    {
+        String value = l->value.get<String>();
+        child = makeASTFunction("toNullable", child);
+    }
+
+    if (fuzz_rand() % 11 == 0)
+    {
+        String value = l->value.get<String>();
+        child = makeASTFunction("toLowCardinality", child);
+    }
+
+    if (fuzz_rand() % 11 == 0)
+    {
+        String value = l->value.get<String>();
+        child = makeASTFunction("materialize", child);
+    }
+
+    return child;
+}
+
+
+void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
+{
+    for (size_t i = 0; i < expr_list.children.size(); i++)
+    {
+        if (auto * literal = typeid_cast<ASTLiteral *>(expr_list.children[i].get()))
+        {
+            if (fuzz_rand() % 13 == 0)
+                expr_list.children[i] = fuzzLiteralUnderExpressionList(expr_list.children[i]);
+        }
+        else
+            fuzz(expr_list.children[i]);
+    }
+}
+
 void QueryFuzzer::fuzz(ASTs & asts)
 {
     for (auto & ast : asts)
@@ -989,7 +1037,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
     }
     else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
     {
-        fuzz(expr_list->children);
+        fuzzExpressionList(*expr_list);
     }
     else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
     {
@@ -1108,7 +1156,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
     }
     /*
      * The time to fuzz the settings has not yet come.
-     * Apparently we don't have any infractructure to validate the values of
+     * Apparently we don't have any infrastructure to validate the values of
      * the settings, and the first query with max_block_size = -1 breaks
      * because of overflows here and there.
      *//*
@@ -1131,9 +1179,8 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
         // are ASTPtr -- this is redundant ownership, but hides the error if the
         // child field is replaced. Others can be ASTLiteral * or the like, which
         // leads to segfault if the pointed-to AST is replaced.
-        // Replacing children is safe in case of ASTExpressionList. In a more
-        // general case, we can change the value of ASTLiteral, which is what we
-        // do here.
+        // Replacing children is safe in case of ASTExpressionList (done in fuzzExpressionList). In a more
+        // general case, we can change the value of ASTLiteral, which is what we do here
         if (fuzz_rand() % 11 == 0)
         {
             literal->value = fuzzField(literal->value);
diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h
index 18c7b8a9241..cdeba2b76fd 100644
--- a/src/Client/QueryFuzzer.h
+++ b/src/Client/QueryFuzzer.h
@@ -95,6 +95,8 @@ struct QueryFuzzer
     void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind);
     void fuzzColumnDeclaration(ASTColumnDeclaration & column);
     void fuzzTableName(ASTTableExpression & table);
+    ASTPtr fuzzLiteralUnderExpressionList(ASTPtr child);
+    void fuzzExpressionList(ASTExpressionList & expr_list);
     void fuzz(ASTs & asts);
     void fuzz(ASTPtr & ast);
     void collectFuzzInfoMain(ASTPtr ast);

From 4b5e992565b060cc002495f8c58cceb79c75d53a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 30 Jan 2024 19:33:31 +0100
Subject: [PATCH 183/884] Fix problems

---
 src/Client/QueryFuzzer.cpp | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 786d5af0cb3..bb551fcb11e 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -916,22 +916,13 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
     }
 
     if (fuzz_rand() % 11 == 0)
-    {
-        String value = l->value.get<String>();
         child = makeASTFunction("toNullable", child);
-    }
 
     if (fuzz_rand() % 11 == 0)
-    {
-        String value = l->value.get<String>();
         child = makeASTFunction("toLowCardinality", child);
-    }
 
     if (fuzz_rand() % 11 == 0)
-    {
-        String value = l->value.get<String>();
         child = makeASTFunction("materialize", child);
-    }
 
     return child;
 }
@@ -939,15 +930,15 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
 
 void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
 {
-    for (size_t i = 0; i < expr_list.children.size(); i++)
+    for (auto & child : expr_list.children)
     {
-        if (auto * literal = typeid_cast<ASTLiteral *>(expr_list.children[i].get()))
+        if (auto * literal = typeid_cast<ASTLiteral *>(child.get()))
         {
             if (fuzz_rand() % 13 == 0)
-                expr_list.children[i] = fuzzLiteralUnderExpressionList(expr_list.children[i]);
+                child = fuzzLiteralUnderExpressionList(child);
         }
         else
-            fuzz(expr_list.children[i]);
+            fuzz(child);
     }
 }
 

From a3f0546f48af77d7c120a7e71d94b992a4446e2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 30 Jan 2024 19:44:55 +0100
Subject: [PATCH 184/884] Handle both fuzzer.log and fuzzer.log.ztd

---
 tests/ci/ast_fuzzer_check.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 41e4ef19361..95a887484f2 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -113,7 +113,6 @@ def main():
     paths = {
         "run.log": run_log_path,
         "main.log": main_log_path,
-        "fuzzer.log": workspace_path / "fuzzer.log",
         "report.html": workspace_path / "report.html",
         "core.zst": workspace_path / "core.zst",
         "dmesg.log": workspace_path / "dmesg.log",
@@ -129,6 +128,14 @@ def main():
     if not_compressed_server_log_path.exists():
         paths["server.log"] = not_compressed_server_log_path
 
+    # Same idea but with the fuzzer log
+    compressed_fuzzer_log_path = workspace_path / "fuzzer.log.zst"
+    if compressed_fuzzer_log_path.exists():
+        paths["fuzzer.log.zst"] = compressed_fuzzer_log_path
+    not_compressed_fuzzer_log_path = workspace_path / "fuzzer.log"
+    if not_compressed_fuzzer_log_path.exists():
+        paths["fuzzer.log"] = not_compressed_fuzzer_log_path
+
     # Try to get status message saved by the fuzzer
     try:
         with open(workspace_path / "status.txt", "r", encoding="utf-8") as status_f:

From 4f0c78d66557bd74d21796ce2ea661132c26abc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 30 Jan 2024 20:25:26 +0100
Subject: [PATCH 185/884] Upload one file. Save the planet

---
 docker/test/fuzzer/run-fuzzer.sh |  4 ++--
 tests/ci/ast_fuzzer_check.py     | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 050d4b68628..ca6bff9c6be 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -389,8 +389,8 @@ fi
 rg --text -F '<Fatal>' server.log > fatal.log ||:
 dmesg -T > dmesg.log ||:
 
-zstd --threads=0 server.log
-zstd --threads=0 fuzzer.log
+zstd --threads=0 --rm server.log
+zstd --threads=0 --rm fuzzer.log
 
 cat > report.html <<EOF ||:
 <!DOCTYPE html>
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 95a887484f2..26ce7f5140b 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -121,20 +121,20 @@ def main():
     compressed_server_log_path = workspace_path / "server.log.zst"
     if compressed_server_log_path.exists():
         paths["server.log.zst"] = compressed_server_log_path
-
-    # The script can fail before the invocation of `zstd`, but we are still interested in its log:
-
-    not_compressed_server_log_path = workspace_path / "server.log"
-    if not_compressed_server_log_path.exists():
-        paths["server.log"] = not_compressed_server_log_path
+    else:
+        # The script can fail before the invocation of `zstd`, but we are still interested in its log:
+        not_compressed_server_log_path = workspace_path / "server.log"
+        if not_compressed_server_log_path.exists():
+            paths["server.log"] = not_compressed_server_log_path
 
     # Same idea but with the fuzzer log
     compressed_fuzzer_log_path = workspace_path / "fuzzer.log.zst"
     if compressed_fuzzer_log_path.exists():
         paths["fuzzer.log.zst"] = compressed_fuzzer_log_path
-    not_compressed_fuzzer_log_path = workspace_path / "fuzzer.log"
-    if not_compressed_fuzzer_log_path.exists():
-        paths["fuzzer.log"] = not_compressed_fuzzer_log_path
+    else:
+        not_compressed_fuzzer_log_path = workspace_path / "fuzzer.log"
+        if not_compressed_fuzzer_log_path.exists():
+            paths["fuzzer.log"] = not_compressed_fuzzer_log_path
 
     # Try to get status message saved by the fuzzer
     try:

From 17ab2674f4c8ad7a09194659e0a0c86d4440f203 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 30 Jan 2024 20:35:10 +0100
Subject: [PATCH 186/884] impl

---
 src/Common/ElapsedTimeProfileEventIncrement.h |  3 +-
 src/Common/ProfileEvents.cpp                  |  7 +++
 .../MergeTreeDataPartWriterOnDisk.cpp         | 15 ++++++
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h |  2 +
 .../MergeTree/MergeTreeDataWriter.cpp         | 47 ++++++++++++++-----
 5 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/src/Common/ElapsedTimeProfileEventIncrement.h b/src/Common/ElapsedTimeProfileEventIncrement.h
index b30afd24a4c..731295a4cfd 100644
--- a/src/Common/ElapsedTimeProfileEventIncrement.h
+++ b/src/Common/ElapsedTimeProfileEventIncrement.h
@@ -14,12 +14,13 @@ enum Time
     Seconds,
 };
 
-template <Time time>
+template <Time unit>
 struct ProfileEventTimeIncrement
 {
     explicit ProfileEventTimeIncrement<time>(ProfileEvents::Event event_)
         : event(event_), watch(CLOCK_MONOTONIC) {}
 
+    template <Time time = unit>
     UInt64 elapsed()
     {
         if constexpr (time == Time::Nanoseconds)
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 8782f895f3f..ef0606162a2 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -197,6 +197,13 @@
     M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \
     M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \
     \
+    M(MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds, "Time spent calculating secondary indices") \
+    M(MergeTreeDataWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
+    M(MergeTreeDataWriterMergingBlocksMicroseconds, "Time spent merging input blocks (for special MergeTree engines)") \
+    M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
+    M(MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
+    M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging input blocks") \
+    \
     M(InsertedWideParts, "Number of parts inserted in Wide format.") \
     M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
     M(MergedIntoWideParts, "Number of parts merged into Wide format.") \
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 6e544b4a35a..eb9e2f52ad9 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -1,6 +1,13 @@
 #include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
 #include <Storages/MergeTree/MergeTreeIndexInverted.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/logger_useful.h>
+
+namespace ProfileEvents
+{
+extern const Event MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds;
+}
 
 namespace DB
 {
@@ -148,6 +155,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     , default_codec(default_codec_)
     , compute_granularity(index_granularity.empty())
     , compress_primary_key(settings.compress_primary_key)
+    , log(getLogger(storage.getLogName() + " (DataPartWriter)"))
 {
     if (settings.blocks_are_granules_size && !index_granularity.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
@@ -354,6 +362,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
             store = it->second;
         }
 
+        size_t index_build_us = 0;
         for (const auto & granule : granules_to_write)
         {
             if (skip_index_accumulated_marks[i] == index_helper->index.granularity)
@@ -378,11 +387,17 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
                     writeBinaryLittleEndian(1UL, marks_out);
             }
 
+            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds);
+
             size_t pos = granule.start_row;
             skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write);
             if (granule.is_complete)
                 ++skip_index_accumulated_marks[i];
+
+            index_build_us += watch.elapsed<Microseconds>();
         }
+        // clang-format off
+        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", index_build_us / 1000, skip_indices[i]->index.name, data_part->name);
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index 4d081778e68..5292c0d5590 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -190,6 +190,8 @@ private:
     void initStatistics();
 
     virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
+
+    LoggerPtr log;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index ce3015c5dcb..cba1131bd3e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -1,21 +1,22 @@
-#include <Storages/MergeTree/MergeTreeDataWriter.h>
-#include <Storages/MergeTree/MergedBlockOutputStream.h>
-#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Columns/ColumnConst.h>
-#include <Common/OpenTelemetryTraceContext.h>
-#include <Common/HashTable/HashMap.h>
-#include <Common/Exception.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/ObjectUtils.h>
 #include <Disks/createVolume.h>
+#include <IO/HashingWriteBuffer.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/AggregationCommon.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/MergeTreeTransaction.h>
-#include <IO/HashingWriteBuffer.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/ObjectUtils.h>
-#include <IO/WriteHelpers.h>
-#include <Common/typeid_cast.h>
 #include <Processors/TTL/ITTLAlgorithm.h>
+#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
+#include <Storages/MergeTree/MergeTreeDataWriter.h>
+#include <Storages/MergeTree/MergedBlockOutputStream.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/Exception.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/OpenTelemetryTraceContext.h>
+#include <Common/typeid_cast.h>
 
 #include <Parsers/queryToString.h>
 
@@ -35,11 +36,16 @@ namespace ProfileEvents
     extern const Event MergeTreeDataWriterRows;
     extern const Event MergeTreeDataWriterUncompressedBytes;
     extern const Event MergeTreeDataWriterCompressedBytes;
+    extern const Event MergeTreeDataWriterPrimaryKeyCalculationMicroseconds;
+    extern const Event MergeTreeDataWriterMergingBlocksMicroseconds;
+    extern const Event MergeTreeDataWriterProjectionsCalculationMicroseconds;
     extern const Event MergeTreeDataProjectionWriterBlocks;
     extern const Event MergeTreeDataProjectionWriterBlocksAlreadySorted;
     extern const Event MergeTreeDataProjectionWriterRows;
     extern const Event MergeTreeDataProjectionWriterUncompressedBytes;
     extern const Event MergeTreeDataProjectionWriterCompressedBytes;
+    extern const Event MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds;
+    extern const Event MergeTreeDataProjectionWriterMergingBlocksMicroseconds;
     extern const Event RejectedInserts;
 }
 
@@ -472,6 +478,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
     IColumn::Permutation perm;
     if (!sort_description.empty())
     {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterPrimaryKeyCalculationMicroseconds);
+
         if (!isAlreadySorted(block, sort_description))
         {
             stableGetPermutation(block, sort_description, perm);
@@ -483,7 +491,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
 
     Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
     if (context->getSettingsRef().optimize_on_insert)
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterMergingBlocksMicroseconds);
         block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params);
+    }
 
     /// Size of part would not be greater than block.bytes() + epsilon
     size_t expected_size = block.bytes();
@@ -588,7 +599,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
 
     for (const auto & projection : metadata_snapshot->getProjections())
     {
-        auto projection_block = projection.calculate(block, context);
+        Block projection_block;
+        {
+            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterProjectionsCalculationMicroseconds);
+            projection_block = projection.calculate(block, context);
+            LOG_DEBUG(log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed(), projection.name, new_data_part->name);
+        }
+
         if (projection_block.rows())
         {
             auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, new_data_part.get());
@@ -685,6 +702,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
     IColumn::Permutation perm;
     if (!sort_description.empty())
     {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds);
+
         if (!isAlreadySorted(block, sort_description))
         {
             stableGetPermutation(block, sort_description, perm);
@@ -696,6 +715,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
 
     if (projection.type == ProjectionDescription::Type::Aggregate)
     {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterMergingBlocksMicroseconds);
+
         MergeTreeData::MergingParams projection_merging_params;
         projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
         block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params);

From b6aecc11498eef490eddfcbec2f02da738e06b6f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 30 Jan 2024 19:35:26 +0000
Subject: [PATCH 187/884] Update reference file

---
 tests/queries/0_stateless/02479_mysql_connect_to_self.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
index 6838dacc3b3..8057b945c5a 100644
--- a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
+++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference
@@ -67,6 +67,6 @@ SELECT
     __table1.a AS a,
     __table1.b AS b,
     __table1.c AS c
-FROM mysql(\'127.0.0.1:9004\', \'default\', foo, \'default\', \'\', SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3) AS __table1
+FROM mysql(\'127.0.0.1:9004\', _CAST(\'default\', \'String\'), foo, \'default\', \'\', SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3) AS __table1
 ---
 5

From 2859ce4f1cf0856f1fc3d5576c43d28bdc7501ed Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@gmail.com>
Date: Tue, 30 Jan 2024 19:56:22 +0000
Subject: [PATCH 188/884] Remove octal issues when parsing with
 input_format_values_interpret_expressions=0

---
 .../Impl/ConstantExpressionTemplate.cpp       |   6 +-
 .../02896_leading_zeroes_no_octal.reference   | 154 ++++++++++++
 .../02896_leading_zeroes_no_octal.sql         | 223 ++++++++++++++++++
 3 files changed, 381 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
 create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index 2602f8b881d..bf584b759f8 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -603,6 +603,8 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
             memcpy(buf, istr.position(), bytes_to_copy);
             buf[bytes_to_copy] = 0;
 
+            const bool hex_like = bytes_to_copy >= 2 && buf[0] == '0' && (buf[1] == 'x' || buf[1] == 'X');
+
             char * pos_double = buf;
             errno = 0;
             Float64 float_value = std::strtod(buf, &pos_double);
@@ -614,13 +616,13 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
 
             char * pos_integer = buf;
             errno = 0;
-            UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
+            UInt64 uint_value = std::strtoull(buf, &pos_integer, hex_like ? 16 : 10);
             if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
             {
                 istr.position() += pos_integer - buf;
                 if (negative && type_info.main_type == Type::Int64)
                     number = static_cast<Int64>(-uint_value);
-                else if (!negative && type_info.main_type == Type::UInt64)
+                else if (type_info.main_type == Type::UInt64 && (!negative || uint_value == 0))
                     number = uint_value;
                 else
                     return false;
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
new file mode 100644
index 00000000000..7c69b7e02aa
--- /dev/null
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
@@ -0,0 +1,154 @@
+Leading zeroes into Int64 (1XXX without input_format_values_interpret_expressions and 1XXXX with)
+1	1000	0	0	0	Single zero
+1	1001	00	0	0	Double zero
+1	1002	000000000000000	0	0	Mutliple redundant zeroes
+1	1003	01	1	1	Octal like, interpret as decimal
+1	1004	08	8	8	Octal like, interpret as decimal
+1	1005	0100	100	100	Octal like, interpret as decimal
+1	1006	0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes
+1	1010	-0	0	0	Single zero negative
+1	1011	-00	0	0	Double zero negative
+1	1012	-000000000000000	0	0	Mutliple redundant zeroes negative
+1	1013	-01	-1	-1	Octal like, interpret as decimal negative
+1	1014	-08	-8	-8	Octal like, interpret as decimal negative
+1	1015	-0100	-100	-100	Octal like, interpret as decimal negative
+1	1016	-0000000000100	-100	-100	Octal like, interpret as decimal, multiple leading zeroes negative
+1	1020	+0	0	0	Single zero positive
+1	1021	+00	0	0	Double zero negpositiveative
+1	1022	+000000000000000	0	0	Mutliple redundant zeroes positive
+1	1023	+01	1	1	Octal like, interpret as decimal positive
+1	1024	+08	8	8	Octal like, interpret as decimal positive
+1	1025	+0100	100	100	Octal like, interpret as decimal positive
+1	1026	+0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes positive
+1	1030	0000.008	0	0	Floating point should work...
+1	1031	-0000.008	0	0	Floating point should work...
+1	1032	+0000.008	0	0	Floating point should work...
+1	1033	0000.008e3	8	8	Floating point should work...
+1	1034	-0000.008e3	-8	-8	Floating point should work...
+1	1035	+0000.008e3	8	8	Floating point should work...
+1	1036	08000.008e-3	8	8	Floating point should work...
+1	1037	-08000.008e-3	-8	-8	Floating point should work...
+1	1038	+08000.008e-3	8	8	Floating point should work...
+1	1060	0x0abcd	43981	43981	Hex should be parsed
+1	1061	-0x0abcd	-43981	-43981	Hex should be parsed
+1	1062	+0x0abcd	43981	43981	Hex should be parsed
+1	1063	0x0abcdP1	87962	87962	Hex should be parsed
+1	1064	0x0abcdP+1	87962	87962	Hex should be parsed
+1	1065	0x0abcdP-1	21990	21990	Hex should be parsed
+1	1066	0x0abcdP01	87962	87962	Hex should be parsed
+1	1067	0x0abcdP+01	87962	87962	Hex should be parsed
+1	1068	0x0abcdP-01	21990	21990	Hex should be parsed
+1	11000	0	0	0	Single zero
+1	11001	00	0	0	Double zero
+1	11002	000000000000000	0	0	Mutliple redundant zeroes
+1	11003	01	1	1	Octal like, interpret as decimal
+1	11004	08	8	8	Octal like, interpret as decimal
+1	11005	0100	100	100	Octal like, interpret as decimal
+1	11006	0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes
+1	11010	-0	0	0	Single zero negative
+1	11011	-00	0	0	Double zero negative
+1	11012	-000000000000000	0	0	Mutliple redundant zeroes negative
+1	11013	-01	-1	-1	Octal like, interpret as decimal negative
+1	11014	-08	-8	-8	Octal like, interpret as decimal negative
+1	11015	-0100	-100	-100	Octal like, interpret as decimal negative
+1	11016	-0000000000100	-100	-100	Octal like, interpret as decimal, multiple leading zeroes negative
+1	11020	+0	0	0	Single zero positive
+1	11021	+00	0	0	Double zero negpositiveative
+1	11022	+000000000000000	0	0	Mutliple redundant zeroes positive
+1	11023	+01	1	1	Octal like, interpret as decimal positive
+1	11024	+08	8	8	Octal like, interpret as decimal positive
+1	11025	+0100	100	100	Octal like, interpret as decimal positive
+1	11026	+0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes positive
+1	11030	0000.008	0	0	Floating point should work...
+1	11031	-0000.008	0	0	Floating point should work...
+1	11032	+0000.008	0	0	Floating point should work...
+1	11033	0000.008e3	8	8	Floating point should work...
+1	11034	-0000.008e3	-8	-8	Floating point should work...
+1	11035	+0000.008e3	8	8	Floating point should work...
+1	11036	08000.008e-3	8	8	Floating point should work...
+1	11037	-08000.008e-3	-8	-8	Floating point should work...
+1	11038	+08000.008e-3	8	8	Floating point should work...
+1	11050	0b10000	16	16	Binary should be parsed
+1	11051	-0b10000	-16	-16	Binary should be parsed
+1	11052	+0b10000	16	16	Binary should be parsed
+1	11060	0x0abcd	43981	43981	Hex should be parsed
+1	11061	-0x0abcd	-43981	-43981	Hex should be parsed
+1	11062	+0x0abcd	43981	43981	Hex should be parsed
+1	11063	0x0abcdP1	87962	87962	Hex should be parsed
+1	11064	0x0abcdP+1	87962	87962	Hex should be parsed
+1	11065	0x0abcdP-1	21990	21990	Hex should be parsed
+1	11066	0x0abcdP01	87962	87962	Hex should be parsed
+1	11067	0x0abcdP+01	87962	87962	Hex should be parsed
+1	11068	0x0abcdP-01	21990	21990	Hex should be parsed
+Leading zeroes into Float64 (2XXX without input_format_values_interpret_expressions and 2XXXX with)
+1	2000	0	0	0	Single zero
+1	2001	00	0	0	Double zero
+1	2002	000000000000000	0	0	Mutliple redundant zeroes
+1	2003	01	1	1	Octal like, interpret as decimal
+1	2004	08	8	8	Octal like, interpret as decimal
+1	2005	0100	100	100	Octal like, interpret as decimal
+1	2006	0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes
+1	2013	-01	-1	-1	Octal like, interpret as decimal negative
+1	2014	-08	-8	-8	Octal like, interpret as decimal negative
+1	2015	-0100	-100	-100	Octal like, interpret as decimal negative
+1	2016	-0000000000100	-100	-100	Octal like, interpret as decimal, multiple leading zeroes negative
+1	2020	+0	0	0	Single zero positive
+1	2021	+00	0	0	Double zero negpositiveative
+1	2022	+000000000000000	0	0	Mutliple redundant zeroes positive
+1	2023	+01	1	1	Octal like, interpret as decimal positive
+1	2024	+08	8	8	Octal like, interpret as decimal positive
+1	2025	+0100	100	100	Octal like, interpret as decimal positive
+1	2026	+0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes positive
+1	2030	0000.008	0.008	0.008	Floating point should work...
+1	2031	-0000.008	-0.008	-0.008	Floating point should work...
+1	2032	+0000.008	0.008	0.008	Floating point should work...
+1	2033	0000.008e3	8	8	Floating point should work...
+1	2034	-0000.008e3	-8	-8	Floating point should work...
+1	2035	+0000.008e3	8	8	Floating point should work...
+1	2036	08.5e-3	0.0085	0.0085	Floating point should work...
+1	2037	-08.5e-3	-0.0085	-0.0085	Floating point should work...
+1	2038	+08.5e-3	0.0085	0.0085	Floating point should work...
+1	2063	0x0abcdP1	87962	87962	Hex should be parsed
+1	2064	0x0abcdP+1	87962	87962	Hex should be parsed
+1	2065	0x0abcdP-1	21990.5	21990.5	Hex should be parsed
+1	2066	0x0abcdP01	87962	87962	Hex should be parsed
+1	2067	0x0abcdP+01	87962	87962	Hex should be parsed
+1	2068	0x0abcdP-01	21990.5	21990.5	Hex should be parsed
+1	2069	0x01P-01	0.5	0.5	Hex should be parsed
+1	12000	0	0	0	Single zero
+1	12001	00	0	0	Double zero
+1	12002	000000000000000	0	0	Mutliple redundant zeroes
+1	12003	01	1	1	Octal like, interpret as decimal
+1	12004	08	8	8	Octal like, interpret as decimal
+1	12005	0100	100	100	Octal like, interpret as decimal
+1	12006	0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes
+1	12013	-01	-1	-1	Octal like, interpret as decimal negative
+1	12014	-08	-8	-8	Octal like, interpret as decimal negative
+1	12015	-0100	-100	-100	Octal like, interpret as decimal negative
+1	12016	-0000000000100	-100	-100	Octal like, interpret as decimal, multiple leading zeroes negative
+1	12020	+0	0	0	Single zero positive
+1	12021	+00	0	0	Double zero negpositiveative
+1	12022	+000000000000000	0	0	Mutliple redundant zeroes positive
+1	12023	+01	1	1	Octal like, interpret as decimal positive
+1	12024	+08	8	8	Octal like, interpret as decimal positive
+1	12025	+0100	100	100	Octal like, interpret as decimal positive
+1	12026	+0000000000100	100	100	Octal like, interpret as decimal, multiple leading zeroes positive
+1	12030	0000.008	0.008	0.008	Floating point should work...
+1	12031	-0000.008	-0.008	-0.008	Floating point should work...
+1	12032	+0000.008	0.008	0.008	Floating point should work...
+1	12033	0000.008e3	8	8	Floating point should work...
+1	12034	-0000.008e3	-8	-8	Floating point should work...
+1	12035	+0000.008e3	8	8	Floating point should work...
+1	12036	08.5e-3	0.0085	0.0085	Floating point should work...
+1	12037	-08.5e-3	-0.0085	-0.0085	Floating point should work...
+1	12038	+08.5e-3	0.0085	0.0085	Floating point should work...
+1	12050	0b10000	16	16	Binary should be parsed
+1	12051	-0b10000	-16	-16	Binary should be parsed
+1	12052	+0b10000	16	16	Binary should be parsed
+1	12063	0x0abcdP1	87962	87962	Hex should be parsed
+1	12064	0x0abcdP+1	87962	87962	Hex should be parsed
+1	12065	0x0abcdP-1	21990.5	21990.5	Hex should be parsed
+1	12066	0x0abcdP01	87962	87962	Hex should be parsed
+1	12067	0x0abcdP+01	87962	87962	Hex should be parsed
+1	12068	0x0abcdP-01	21990.5	21990.5	Hex should be parsed
+1	12069	0x01P-01	0.5	0.5	Hex should be parsed
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
new file mode 100644
index 00000000000..be04224bd52
--- /dev/null
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -0,0 +1,223 @@
+DROP TABLE IF EXISTS t_leading_zeroes;
+DROP TABLE IF EXISTS t_leading_zeroes_f;
+
+CREATE TABLE t_leading_zeroes(id Int64, input String, val Int64, expected Int64, comment String) ENGINE=MergeTree ORDER BY id;
+CREATE TABLE t_leading_zeroes_f(id Int64, input String, val Float64, expected Float64, comment String) ENGINE=MergeTree ORDER BY id;
+
+SET input_format_values_interpret_expressions = 0;
+
+INSERT INTO t_leading_zeroes VALUES (1000, '0', 0, 0, 'Single zero');
+INSERT INTO t_leading_zeroes VALUES (1001, '00', 00, 0, 'Double zero');
+INSERT INTO t_leading_zeroes VALUES (1002, '000000000000000', 000000000000000, 0, 'Mutliple redundant zeroes');
+INSERT INTO t_leading_zeroes VALUES (1003, '01', 01, 1, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (1004, '08', 08, 8, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (1005, '0100', 0100, 100, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (1006, '0000000000100', 0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes');
+
+INSERT INTO t_leading_zeroes VALUES (1010, '-0', -0, 0, 'Single zero negative');
+INSERT INTO t_leading_zeroes VALUES (1011, '-00', -00, 0, 'Double zero negative');
+INSERT INTO t_leading_zeroes VALUES (1012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
+INSERT INTO t_leading_zeroes VALUES (1013, '-01', -01, -1, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (1014, '-08', -08, -8, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (1015, '-0100', -0100, -100, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (1016, '-0000000000100', -0000000000100, -100, 'Octal like, interpret as decimal, multiple leading zeroes negative');
+
+INSERT INTO t_leading_zeroes VALUES (1020, '+0', +0, 0, 'Single zero positive');
+INSERT INTO t_leading_zeroes VALUES (1021, '+00', +00, 0, 'Double zero negpositiveative');
+INSERT INTO t_leading_zeroes VALUES (1022, '+000000000000000', +000000000000000, 0, 'Mutliple redundant zeroes positive');
+INSERT INTO t_leading_zeroes VALUES (1023, '+01', +01, 1, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (1024, '+08', +08, 8, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (1025, '+0100', +0100, 100, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (1026, '+0000000000100', +0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes positive');
+
+INSERT INTO t_leading_zeroes VALUES (1030, '0000.008', 0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1031, '-0000.008', -0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1032, '+0000.008', +0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1033, '0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1034, '-0000.008e3', -0000.008e3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1035, '+0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1036, '08000.008e-3', 08000.008e-3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1037, '-08000.008e-3', -08000.008e-3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (1038, '+08000.008e-3', 08000.008e-3, 8, 'Floating point should work...');
+
+INSERT INTO t_leading_zeroes VALUES (1060, '0x0abcd', 0x0abcd, 43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1061, '-0x0abcd', -0x0abcd, -43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1062, '+0x0abcd', +0x0abcd, 43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1063, '0x0abcdP1', 0x0abcdP1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1064, '0x0abcdP+1', 0x0abcdP+1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1065, '0x0abcdP-1', 0x0abcdP-1, 21990, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1066, '0x0abcdP01', 0x0abcdP01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1067, '0x0abcdP+01', 0x0abcdP+01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (1068, '0x0abcdP-01', 0x0abcdP-01, 21990, 'Hex should be parsed');
+
+
+-- Floating point numbers go via readFloatTextFastImpl - so should not be affected
+
+INSERT INTO t_leading_zeroes_f VALUES (2000, '0', 0, 0, 'Single zero');
+INSERT INTO t_leading_zeroes_f VALUES (2001, '00', 00, 0, 'Double zero');
+INSERT INTO t_leading_zeroes_f VALUES (2002, '000000000000000', 000000000000000, 0, 'Mutliple redundant zeroes');
+INSERT INTO t_leading_zeroes_f VALUES (2003, '01', 01, 1, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (2004, '08', 08, 8, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (2005, '0100', 0100, 100, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (2006, '0000000000100', 0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes');
+
+-- Float negative zero is machine/context dependent
+--INSERT INTO t_leading_zeroes_f VALUES (2010, '-0', -0, 0, 'Single zero negative');
+--INSERT INTO t_leading_zeroes_f VALUES (2011, '-00', -00, 0, 'Double zero negative');
+--INSERT INTO t_leading_zeroes_f VALUES (2012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
+INSERT INTO t_leading_zeroes_f VALUES (2013, '-01', -01, -1, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (2014, '-08', -08, -8, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (2015, '-0100', -0100, -100, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (2016, '-0000000000100', -0000000000100, -100, 'Octal like, interpret as decimal, multiple leading zeroes negative');
+
+INSERT INTO t_leading_zeroes_f VALUES (2020, '+0', +0, 0, 'Single zero positive');
+INSERT INTO t_leading_zeroes_f VALUES (2021, '+00', +00, 0, 'Double zero negpositiveative');
+INSERT INTO t_leading_zeroes_f VALUES (2022, '+000000000000000', +000000000000000, 0, 'Mutliple redundant zeroes positive');
+INSERT INTO t_leading_zeroes_f VALUES (2023, '+01', +01, 1, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (2024, '+08', +08, 8, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (2025, '+0100', +0100, 100, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (2026, '+0000000000100', +0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes positive');
+
+INSERT INTO t_leading_zeroes_f VALUES (2030, '0000.008', 0000.008, 0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2031, '-0000.008', -0000.008, -0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2032, '+0000.008', +0000.008, 0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2033, '0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2034, '-0000.008e3', -0000.008e3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2035, '+0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2036, '08.5e-3', 08.5e-3, 0.0085, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2037, '-08.5e-3', -08.5e-3, -0.0085, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (2038, '+08.5e-3', 08.5e-3, 0.0085, 'Floating point should work...');
+
+INSERT INTO t_leading_zeroes_f VALUES (2063, '0x0abcdP1', 0x0abcdP1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2064, '0x0abcdP+1', 0x0abcdP+1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2065, '0x0abcdP-1', 0x0abcdP-1, 21990.5, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2066, '0x0abcdP01', 0x0abcdP01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2067, '0x0abcdP+01', 0x0abcdP+01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2068, '0x0abcdP-01', 0x0abcdP-01, 21990.5, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (2069, '0x01P-01', 0x01P-01, 0.5, 'Hex should be parsed');
+
+-- Coincidentally, the following result in 9 rather than 9e9 because of readFloatTextFastImpl
+-- using readUIntTextUpToNSignificantDigits<4>(exponent, in)
+-- INSERT INTO t_leading_zeroes_f VALUES (2070, '00009e00009', 00009e00009, 9e9, '???');
+
+-- Binary should not work with input_format_values_interpret_expressions = 0;
+
+INSERT INTO t_leading_zeroes_f VALUES (2050, '0b10000', 0b10000, 16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+INSERT INTO t_leading_zeroes_f VALUES (2051, '-0b10000', -0b10000, -16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+INSERT INTO t_leading_zeroes_f VALUES (2052, '+0b10000', +0b10000, 16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+
+INSERT INTO t_leading_zeroes VALUES (1050, '0b10000', 0b10000, 16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+INSERT INTO t_leading_zeroes VALUES (1051, '-0b10000', -0b10000, -16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+INSERT INTO t_leading_zeroes VALUES (1052, '+0b10000', +0b10000, 16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
+
+
+
+SET input_format_values_interpret_expressions = 1;
+
+INSERT INTO t_leading_zeroes VALUES (11000, '0', 0, 0, 'Single zero');
+INSERT INTO t_leading_zeroes VALUES (11001, '00', 00, 0, 'Double zero');
+INSERT INTO t_leading_zeroes VALUES (11002, '000000000000000', 000000000000000, 0, 'Mutliple redundant zeroes');
+INSERT INTO t_leading_zeroes VALUES (11003, '01', 01, 1, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (11004, '08', 08, 8, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (11005, '0100', 0100, 100, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes VALUES (11006, '0000000000100', 0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes');
+
+INSERT INTO t_leading_zeroes VALUES (11010, '-0', -0, 0, 'Single zero negative');
+INSERT INTO t_leading_zeroes VALUES (11011, '-00', -00, 0, 'Double zero negative');
+INSERT INTO t_leading_zeroes VALUES (11012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
+INSERT INTO t_leading_zeroes VALUES (11013, '-01', -01, -1, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (11014, '-08', -08, -8, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (11015, '-0100', -0100, -100, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes VALUES (11016, '-0000000000100', -0000000000100, -100, 'Octal like, interpret as decimal, multiple leading zeroes negative');
+
+INSERT INTO t_leading_zeroes VALUES (11020, '+0', +0, 0, 'Single zero positive');
+INSERT INTO t_leading_zeroes VALUES (11021, '+00', +00, 0, 'Double zero negpositiveative');
+INSERT INTO t_leading_zeroes VALUES (11022, '+000000000000000', +000000000000000, 0, 'Mutliple redundant zeroes positive');
+INSERT INTO t_leading_zeroes VALUES (11023, '+01', +01, 1, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (11024, '+08', +08, 8, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (11025, '+0100', +0100, 100, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes VALUES (11026, '+0000000000100', +0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes positive');
+
+INSERT INTO t_leading_zeroes VALUES (11030, '0000.008', 0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11031, '-0000.008', -0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11032, '+0000.008', +0000.008, 0, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11033, '0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11034, '-0000.008e3', -0000.008e3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11035, '+0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11036, '08000.008e-3', 08000.008e-3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11037, '-08000.008e-3', -08000.008e-3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes VALUES (11038, '+08000.008e-3', 08000.008e-3, 8, 'Floating point should work...');
+
+INSERT INTO t_leading_zeroes VALUES (11050, '0b10000', 0b10000, 16, 'Binary should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11051, '-0b10000', -0b10000, -16, 'Binary should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11052, '+0b10000', +0b10000, 16, 'Binary should be parsed');
+
+INSERT INTO t_leading_zeroes VALUES (11060, '0x0abcd', 0x0abcd, 43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11061, '-0x0abcd', -0x0abcd, -43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11062, '+0x0abcd', +0x0abcd, 43981, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11063, '0x0abcdP1', 0x0abcdP1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11064, '0x0abcdP+1', 0x0abcdP+1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11065, '0x0abcdP-1', 0x0abcdP-1, 21990, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11066, '0x0abcdP01', 0x0abcdP01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11067, '0x0abcdP+01', 0x0abcdP+01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes VALUES (11068, '0x0abcdP-01', 0x0abcdP-01, 21990, 'Hex should be parsed');
+
+-- Floating point numbers go via readFloatTextFastImpl - so should not be affected
+
+INSERT INTO t_leading_zeroes_f VALUES (12000, '0', 0, 0, 'Single zero');
+INSERT INTO t_leading_zeroes_f VALUES (12001, '00', 00, 0, 'Double zero');
+INSERT INTO t_leading_zeroes_f VALUES (12002, '000000000000000', 000000000000000, 0, 'Mutliple redundant zeroes');
+INSERT INTO t_leading_zeroes_f VALUES (12003, '01', 01, 1, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (12004, '08', 08, 8, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (12005, '0100', 0100, 100, 'Octal like, interpret as decimal');
+INSERT INTO t_leading_zeroes_f VALUES (12006, '0000000000100', 0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes');
+
+-- Float negative zero is machine/context dependent
+-- INSERT INTO t_leading_zeroes_f VALUES (12010, '-0', -0, 0, 'Single zero negative');
+-- INSERT INTO t_leading_zeroes_f VALUES (12011, '-00', -00, 0, 'Double zero negative');
+-- INSERT INTO t_leading_zeroes_f VALUES (12012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
+INSERT INTO t_leading_zeroes_f VALUES (12013, '-01', -01, -1, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (12014, '-08', -08, -8, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (12015, '-0100', -0100, -100, 'Octal like, interpret as decimal negative');
+INSERT INTO t_leading_zeroes_f VALUES (12016, '-0000000000100', -0000000000100, -100, 'Octal like, interpret as decimal, multiple leading zeroes negative');
+
+INSERT INTO t_leading_zeroes_f VALUES (12020, '+0', +0, 0, 'Single zero positive');
+INSERT INTO t_leading_zeroes_f VALUES (12021, '+00', +00, 0, 'Double zero negpositiveative');
+INSERT INTO t_leading_zeroes_f VALUES (12022, '+000000000000000', +000000000000000, 0, 'Mutliple redundant zeroes positive');
+INSERT INTO t_leading_zeroes_f VALUES (12023, '+01', +01, 1, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (12024, '+08', +08, 8, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (12025, '+0100', +0100, 100, 'Octal like, interpret as decimal positive');
+INSERT INTO t_leading_zeroes_f VALUES (12026, '+0000000000100', +0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes positive');
+
+INSERT INTO t_leading_zeroes_f VALUES (12030, '0000.008', 0000.008, 0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12031, '-0000.008', -0000.008, -0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12032, '+0000.008', +0000.008, 0.008, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12033, '0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12034, '-0000.008e3', -0000.008e3, -8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12035, '+0000.008e3', 0000.008e3, 8, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12036, '08.5e-3', 08.5e-3, 0.0085, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12037, '-08.5e-3', -08.5e-3, -0.0085, 'Floating point should work...');
+INSERT INTO t_leading_zeroes_f VALUES (12038, '+08.5e-3', 08.5e-3, 0.0085, 'Floating point should work...');
+
+INSERT INTO t_leading_zeroes_f VALUES (12050, '0b10000', 0b10000, 16, 'Binary should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12051, '-0b10000', -0b10000, -16, 'Binary should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12052, '+0b10000', +0b10000, 16, 'Binary should be parsed');
+
+INSERT INTO t_leading_zeroes_f VALUES (12063, '0x0abcdP1', 0x0abcdP1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12064, '0x0abcdP+1', 0x0abcdP+1, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12065, '0x0abcdP-1', 0x0abcdP-1, 21990.5, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12066, '0x0abcdP01', 0x0abcdP01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12067, '0x0abcdP+01', 0x0abcdP+01, 87962, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12068, '0x0abcdP-01', 0x0abcdP-01, 21990.5, 'Hex should be parsed');
+INSERT INTO t_leading_zeroes_f VALUES (12069, '0x01P-01', 0x01P-01, 0.5, 'Hex should be parsed');
+
+SELECT 'Leading zeroes into Int64 (1XXX without input_format_values_interpret_expressions and 1XXXX with)';
+SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id;
+
+
+SELECT 'Leading zeroes into Float64 (2XXX without input_format_values_interpret_expressions and 2XXXX with)';
+SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id;
+
+
+DROP TABLE IF EXISTS t_leading_zeroes;
+DROP TABLE IF EXISTS t_leading_zeroes_f;

From d8a6e53d0b4b0e504982321043bec9f9180e87c1 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 30 Jan 2024 20:27:30 +0000
Subject: [PATCH 189/884] Use FQDN as host name by default for TCP

- instead of display_name setting value
---
 src/Interpreters/ProfileEventsExt.cpp |  6 +++---
 src/Interpreters/ProfileEventsExt.h   |  2 +-
 src/Server/TCPHandler.cpp             | 12 ++++++------
 src/Server/TCPHandler.h               |  2 +-
 src/Server/TCPHandlerFactory.h        | 13 +++++++++----
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp
index bd421ae8e33..dd8306066e7 100644
--- a/src/Interpreters/ProfileEventsExt.cpp
+++ b/src/Interpreters/ProfileEventsExt.cpp
@@ -99,7 +99,7 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl
 }
 
 void getProfileEvents(
-    const String & server_display_name,
+    const String & host_name,
     DB::InternalProfileEventsQueuePtr profile_queue,
     DB::Block & block,
     ThreadIdToCountersSnapshot & last_sent_snapshots)
@@ -139,8 +139,8 @@ void getProfileEvents(
     }
     last_sent_snapshots = std::move(new_snapshots);
 
-    dumpProfileEvents(group_snapshot, columns, server_display_name);
-    dumpMemoryTracker(group_snapshot, columns, server_display_name);
+    dumpProfileEvents(group_snapshot, columns, host_name);
+    dumpMemoryTracker(group_snapshot, columns, host_name);
 
     Block curr_block;
 
diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h
index cc338530510..9099e6902ec 100644
--- a/src/Interpreters/ProfileEventsExt.h
+++ b/src/Interpreters/ProfileEventsExt.h
@@ -26,7 +26,7 @@ using ThreadIdToCountersSnapshot = std::unordered_map<UInt64, Counters::Snapshot
 void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true);
 
 void getProfileEvents(
-    const String & server_display_name,
+    const String & host_name,
     DB::InternalProfileEventsQueuePtr profile_queue,
     DB::Block & block,
     ThreadIdToCountersSnapshot & last_sent_snapshots);
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index ec6b374518d..339b664640f 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -184,7 +184,7 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo
 namespace DB
 {
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
+TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string host_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
@@ -192,11 +192,11 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , log(getLogger("TCPHandler"))
     , read_event(read_event_)
     , write_event(write_event_)
-    , server_display_name(std::move(server_display_name_))
+    , host_name(std::move(host_name_))
 {
 }
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
+TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string host_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
 : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
@@ -206,7 +206,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , read_event(read_event_)
     , write_event(write_event_)
     , default_database(stack_data.default_database)
-    , server_display_name(std::move(server_display_name_))
+    , host_name(std::move(host_name_))
 {
     if (!forwarded_for.empty())
         LOG_TRACE(log, "Forwarded client address: {}", forwarded_for);
@@ -1201,7 +1201,7 @@ void TCPHandler::sendExtremes(const Block & extremes)
 void TCPHandler::sendProfileEvents()
 {
     Block block;
-    ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots);
+    ProfileEvents::getProfileEvents(host_name, state.profile_queue, block, last_sent_snapshots);
     if (block.rows() != 0)
     {
         initProfileEventsBlockOutput(block);
@@ -1536,7 +1536,7 @@ void TCPHandler::sendHello()
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
         writeStringBinary(DateLUT::instance().getTimeZone(), *out);
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
-        writeStringBinary(server_display_name, *out);
+        writeStringBinary(host_name, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
         writeVarUInt(VERSION_PATCH, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 26cecf46662..80ec484ab1c 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -224,7 +224,7 @@ private:
     ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots;
 
     /// It is the name of the server that will be sent to the client.
-    String server_display_name;
+    String host_name;
 
     void runImpl();
 
diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index d65c9898b23..212572fb7a7 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -19,7 +19,7 @@ private:
     IServer & server;
     bool parse_proxy_protocol = false;
     LoggerPtr log;
-    std::string server_display_name;
+    std::string host_name;
 
     ProfileEvents::Event read_event;
     ProfileEvents::Event write_event;
@@ -42,7 +42,12 @@ public:
         , read_event(read_event_)
         , write_event(write_event_)
     {
-        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
+        const bool use_display_name = server.config().getBool("tcp_use_display_name_as_host_name", false);
+        if (use_display_name)
+            host_name = server.config().getString("display_name", "");
+
+        if (!host_name.length())
+            host_name = getFQDNOrHostName();
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override
@@ -50,7 +55,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, host_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {
@@ -64,7 +69,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, stack_data, host_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {

From 1be8b61c5b0ca501588c618c986dfb32e1586a1b Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 30 Jan 2024 20:50:03 +0100
Subject: [PATCH 190/884] add test

---
 .../MergeTreeDataPartWriterOnDisk.cpp         |  1 -
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 ...2_perf_introspection_for_inserts.reference |  1 +
 .../02982_perf_introspection_for_inserts.sh   | 46 +++++++++++++++++++
 4 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02982_perf_introspection_for_inserts.reference
 create mode 100755 tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index eb9e2f52ad9..153bcaa6320 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -396,7 +396,6 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
 
             index_build_us += watch.elapsed<Microseconds>();
         }
-        // clang-format off
         LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", index_build_us / 1000, skip_indices[i]->index.name, data_part->name);
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index cba1131bd3e..8f522832bce 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -603,7 +603,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         {
             ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterProjectionsCalculationMicroseconds);
             projection_block = projection.calculate(block, context);
-            LOG_DEBUG(log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed(), projection.name, new_data_part->name);
+            LOG_DEBUG(log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed() / 1000, projection.name, new_data_part->name);
         }
 
         if (projection_block.rows())
diff --git a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.reference b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.reference
new file mode 100644
index 00000000000..50d4d226b46
--- /dev/null
+++ b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.reference
@@ -0,0 +1 @@
+1	1	1	1	1
diff --git a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
new file mode 100755
index 00000000000..a45f88fc10f
--- /dev/null
+++ b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+$CLICKHOUSE_CLIENT -q """
+CREATE TABLE t02982
+(
+    n UInt64,
+    s Nullable(String),
+    INDEX idx1 n TYPE minmax GRANULARITY 2,
+    INDEX idx2 n * length(s) TYPE set(1000) GRANULARITY 2,
+    PROJECTION pr_sort
+    (
+        SELECT
+            n,
+            sum(length(s))
+        GROUP BY n
+    )
+)
+ENGINE = MergeTree
+ORDER BY n;
+"""
+
+query_id=$RANDOM
+
+$CLICKHOUSE_CLIENT --query_id $query_id -q """
+INSERT INTO t02982 SELECT
+    number,
+    'a'
+FROM numbers_mt(1000000);
+"""
+
+$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
+$CLICKHOUSE_CLIENT -q """
+SELECT
+    ProfileEvents['MergeTreeDataProjectionWriterMergingBlocksMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataWriterPrimaryKeyCalculationMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataWriterProjectionsCalculationMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds'] > 0
+FROM system.query_log
+WHERE current_database = currentDatabase() AND query_id='$query_id' AND type = 'QueryFinish';
+"""

From 2ad7607bad756ae7e2118513853fe2b821953601 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 31 Jan 2024 17:24:51 +0800
Subject: [PATCH 191/884] opt if when input type is map

---
 src/Functions/if.cpp                          | 117 +++++++++++++++++-
 tests/performance/if.xml                      |  21 ++--
 .../0_stateless/02974_if_with_map.reference   |  30 +++++
 .../queries/0_stateless/02974_if_with_map.sql |  15 +++
 4 files changed, 169 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02974_if_with_map.reference
 create mode 100644 tests/queries/0_stateless/02974_if_with_map.sql

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index c784b2fca59..15ae04d3a29 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1,9 +1,19 @@
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypesDecimal.h>
+#include <type_traits>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/MaskOperations.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFixedString.h>
-#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeNullable.h>
+<<<<<<< HEAD
 #include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/NumberTraits.h>
 #include <DataTypes/getLeastSupertype.h>
@@ -20,14 +30,28 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Functions/IFunction.h>
+=======
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/NumberTraits.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <Functions/FunctionFactory.h>
+>>>>>>> 83434321f39... opt if when input type is map
 #include <Functions/FunctionHelpers.h>
-#include <Functions/GatherUtils/Algorithms.h>
 #include <Functions/FunctionIfBase.h>
+#include <Functions/GatherUtils/Algorithms.h>
+#include <Functions/IFunction.h>
 #include <Interpreters/castColumn.h>
+<<<<<<< HEAD
 #include <Interpreters/Context.h>
 
 #include <Functions/FunctionFactory.h>
 #include <type_traits>
+=======
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+>>>>>>> 83434321f39... opt if when input type is map
 
 namespace DB
 {
@@ -36,6 +60,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NOT_IMPLEMENTED;
+    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
 }
 
 namespace
@@ -679,6 +704,87 @@ private:
         return ColumnTuple::create(tuple_columns);
     }
 
+    ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
+    {
+        auto extract_kv_from_map = [](const ColumnMap * map)
+        {
+            const ColumnTuple & tuple = map->getNestedData();
+            const auto & keys = tuple.getColumnPtr(0);
+            const auto & values = tuple.getColumnPtr(1);
+            const auto & offsets = map->getNestedColumn().getOffsetsPtr();
+            return std::make_pair(ColumnArray::create(keys, offsets), ColumnArray::create(values, offsets));
+        };
+
+        /// Extract keys and values from both arguments
+        Columns key_cols(2);
+        Columns value_cols(2);
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const auto & arg = arguments[i + 1];
+            if (const ColumnMap * map = checkAndGetColumn<ColumnMap>(arg.column.get()))
+            {
+                auto [key_col, value_col] = extract_kv_from_map(map);
+                key_cols[i] = std::move(key_col);
+                value_cols[i] = std::move(value_col);
+            }
+            else if (const ColumnConst * const_map = checkAndGetColumnConst<ColumnMap>(arg.column.get()))
+            {
+                const ColumnMap * map_data = assert_cast<const ColumnMap *>(&const_map->getDataColumn());
+                auto [key_col, value_col] = extract_kv_from_map(map_data);
+
+                size_t size = const_map->size();
+                key_cols[i] = ColumnConst::create(std::move(key_col), size);
+                value_cols[i] = ColumnConst::create(std::move(value_col), size);
+            }
+            else
+                return nullptr;
+        }
+
+        /// Compose temporary columns for keys and values
+        ColumnsWithTypeAndName key_columns(3);
+        key_columns[0] = arguments[0];
+        ColumnsWithTypeAndName value_columns(3);
+        value_columns[0] = arguments[0];
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const auto & arg = arguments[i + 1];
+            const DataTypeMap & type = static_cast<const DataTypeMap &>(*arg.type);
+            const auto & key_type = type.getKeyType();
+            const auto & value_type = type.getValueType();
+            key_columns[i + 1] = {key_cols[i], key_type, {}};
+            value_columns[i + 1] = {value_cols[i], value_type, {}};
+        }
+
+        /// Calculate function corresponding keys and values in map
+        const DataTypeMap & map_result_type = static_cast<const DataTypeMap &>(*result_type);
+        auto key_result_type = std::make_shared<DataTypeArray>(map_result_type.getKeyType());
+        auto value_result_type = std::make_shared<DataTypeArray>(map_result_type.getValueType());
+        ColumnPtr key_result = executeImpl(key_columns, key_result_type, input_rows_count);
+        ColumnPtr value_result = executeImpl(value_columns, value_result_type, input_rows_count);
+
+        /// key_result and value_result are not constant columns otherwise we won't reach here in executeMap
+        const auto * key_array = assert_cast<const ColumnArray *>(key_result.get());
+        const auto * value_array = assert_cast<const ColumnArray *>(value_result.get());
+        if (!key_array)
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN, "Illegal key result column {} in executeMap for function {}", key_result->getName(), getName());
+        if (!value_array)
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal value result column {} in executeMap for function {}",
+                value_result->getName(),
+                getName());
+        if (!key_array->hasEqualOffsets(*value_array))
+            throw Exception(
+                ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
+                "Key result column and value result column in executeMap for function {} must have equal sizes",
+                getName());
+
+        auto nested_column = ColumnArray::create(
+            ColumnTuple::create(Columns{key_array->getDataPtr(), value_array->getDataPtr()}), key_array->getOffsetsPtr());
+        return ColumnMap::create(std::move(nested_column));
+    }
+
     static ColumnPtr executeGeneric(
         const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type)
     {
@@ -1195,7 +1301,8 @@ public:
             || (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count))
             || (res = executeString(cond_col, arguments, result_type))
             || (res = executeGenericArray(cond_col, arguments, result_type))
-            || (res = executeTuple(arguments, result_type, input_rows_count))))
+            || (res = executeTuple(arguments, result_type, input_rows_count))
+            || (res = executeMap(arguments, result_type, input_rows_count))))
         {
             return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
         }
diff --git a/tests/performance/if.xml b/tests/performance/if.xml
index f4d0e8f9773..a922500ab90 100644
--- a/tests/performance/if.xml
+++ b/tests/performance/if.xml
@@ -1,12 +1,15 @@
 <test>
 
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() > 42949673, zero + 1, zero + 2)) ]]></query>
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 3865470566, zero + 1, zero + 2)) ]]></query>
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 2147483647, zero + 1, zero + 2)) ]]></query>
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, zero + 2)) ]]></query>
-
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, 2)) ]]></query>
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, zero + 2)) ]]></query>
-<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, 2)) ]]></query>
-
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() > 42949673, zero + 1, zero + 2)) ]]></query>
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 3865470566, zero + 1, zero + 2)) ]]></query>
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 2147483647, zero + 1, zero + 2)) ]]></query>
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, zero + 2)) ]]></query>
+    
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, 2)) ]]></query>
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, zero + 2)) ]]></query>
+    <query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, 2)) ]]></query>
+    
+    <!-- Tests when branches are maps -->
+    <query>with rand32() % 2 as x select if(x, map(1,2,3,4), map(3,4,5,6)) from numbers(1000000) format Null</query>
+    <query>with rand32() % 2 as x select if(x, materialize(map(1,2,3,4)), materialize(map(3,4,5,6))) from numbers(1000000) format Null</query>
 </test>
diff --git a/tests/queries/0_stateless/02974_if_with_map.reference b/tests/queries/0_stateless/02974_if_with_map.reference
new file mode 100644
index 00000000000..00dca1f95a6
--- /dev/null
+++ b/tests/queries/0_stateless/02974_if_with_map.reference
@@ -0,0 +1,30 @@
+{1:2,3:4}
+{3:4,5:6}
+{1:2,3:4}
+{3:4,5:6}
+{3:4,5:6}
+{1:2,3:4}
+{1:2,3:4}
+{1:2,3:4}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{1:2,3:4}
+{1:2,3:4}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{3:4,5:6}
+{1:2,3:4}
+{1:2,3:4}
+{1:2,3:4}
+{1:2,3:4}
+{3:4,5:6}
+{3:4,5:6}
+{1:2,3:4}
+{1:2,3:4}
+{1:2,3:4}
+{1:2,3:4}
diff --git a/tests/queries/0_stateless/02974_if_with_map.sql b/tests/queries/0_stateless/02974_if_with_map.sql
new file mode 100644
index 00000000000..182d20de07e
--- /dev/null
+++ b/tests/queries/0_stateless/02974_if_with_map.sql
@@ -0,0 +1,15 @@
+select if(number % 2 = 0, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
+select if(number % 2 = 0, materialize(map(1,2,3,4)), map(3,4,5,6)) from numbers(2);
+select if(number % 2 = 0, map(3,4,5,6), materialize(map(1,2,3,4))) from numbers(2);
+select if(1, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
+select if(0, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
+select if(null, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
+select if(1, materialize(map(1,2,3,4)), map(3,4,5,6)) from numbers(2);
+select if(0, materialize(map(1,2,3,4)), map(3,4,5,6)) from numbers(2);
+select if(null, materialize(map(1,2,3,4)), map(3,4,5,6)) from numbers(2);
+select if(1, map(3,4,5,6), materialize(map(1,2,3,4))) from numbers(2);
+select if(0, map(3,4,5,6), materialize(map(1,2,3,4))) from numbers(2);
+select if(null, map(3,4,5,6), materialize(map(1,2,3,4))) from numbers(2);
+select if(1, materialize(map(3,4,5,6)), materialize(map(1,2,3,4))) from numbers(2);
+select if(0, materialize(map(3,4,5,6)), materialize(map(1,2,3,4))) from numbers(2);
+select if(null, materialize(map(3,4,5,6)), materialize(map(1,2,3,4))) from numbers(2);

From dc7b9d8208c9d696c08c6c8aa5332054b187ca7e Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 31 Jan 2024 17:37:37 +0800
Subject: [PATCH 192/884] fix conflicts

---
 src/Functions/if.cpp | 29 +++--------------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 15ae04d3a29..b24c4350649 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -7,51 +7,28 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVariant.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/MaskOperations.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeNullable.h>
-<<<<<<< HEAD
-#include <DataTypes/DataTypeVariant.h>
-#include <DataTypes/NumberTraits.h>
-#include <DataTypes/getLeastSupertype.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnDecimal.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnVariant.h>
-#include <Columns/MaskOperations.h>
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-#include <Functions/IFunction.h>
-=======
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/NumberTraits.h>
 #include <DataTypes/getLeastSupertype.h>
 #include <Functions/FunctionFactory.h>
->>>>>>> 83434321f39... opt if when input type is map
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionIfBase.h>
 #include <Functions/GatherUtils/Algorithms.h>
 #include <Functions/IFunction.h>
-#include <Interpreters/castColumn.h>
-<<<<<<< HEAD
 #include <Interpreters/Context.h>
-
-#include <Functions/FunctionFactory.h>
-#include <type_traits>
-=======
+#include <Interpreters/castColumn.h>
 #include <Common/assert_cast.h>
 #include <Common/typeid_cast.h>
->>>>>>> 83434321f39... opt if when input type is map
 
 namespace DB
 {

From aaed83541517e23137425ed5ed7e978a4f89f168 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 31 Jan 2024 12:59:26 +0000
Subject: [PATCH 193/884] Validate variants, use new validation on create
 queries

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 64 ++-----------------
 .../parseColumnsListForTableFunction.cpp      |  6 ++
 .../0_stateless/02981_nested_bad_types.sql    | 41 ++++++++++--
 3 files changed, 46 insertions(+), 65 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index a87464eb5de..a4d93eb623b 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -50,6 +50,7 @@
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/GinFilter.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
 
 #include <Access/Common/AccessRightsElement.h>
 
@@ -910,66 +911,13 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
 
     const auto & settings = getContext()->getSettingsRef();
 
-    /// Check low cardinality types in creating table if it was not allowed in setting
-    if (!create.attach && !settings.allow_suspicious_low_cardinality_types && !create.is_materialized_view)
+    /// If it's not attach and not materialized view to existing table,
+    /// we need to validate data types (check for experimental or suspicious types).
+    if (!create.attach && !create.is_materialized_view)
     {
+        DataTypeValidationSettings validation_settings(settings);
         for (const auto & name_and_type_pair : properties.columns.getAllPhysical())
-        {
-            if (const auto * current_type_ptr = typeid_cast<const DataTypeLowCardinality *>(name_and_type_pair.type.get()))
-            {
-                if (!isStringOrFixedString(*removeNullable(current_type_ptr->getDictionaryType())))
-                    throw Exception(ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY,
-                                    "Creating columns of type {} is prohibited by default "
-                                    "due to expected negative impact on performance. "
-                                    "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.",
-                                    current_type_ptr->getName());
-            }
-        }
-    }
-
-    if (!create.attach && !settings.allow_experimental_object_type)
-    {
-        for (const auto & [name, type] : properties.columns.getAllPhysical())
-        {
-            if (type->hasDynamicSubcolumns())
-            {
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                    "Cannot create table with column '{}' which type is '{}' "
-                    "because experimental Object type is not allowed. "
-                    "Set setting allow_experimental_object_type = 1 in order to allow it",
-                    name, type->getName());
-            }
-        }
-    }
-    if (!create.attach && !settings.allow_suspicious_fixed_string_types)
-    {
-        for (const auto & [name, type] : properties.columns.getAllPhysical())
-        {
-            auto basic_type = removeLowCardinalityAndNullable(type);
-            if (const auto * fixed_string = typeid_cast<const DataTypeFixedString *>(basic_type.get()))
-            {
-                if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS)
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                        "Cannot create table with column '{}' which type is '{}' "
-                        "because fixed string with size > {} is suspicious. "
-                        "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it",
-                        name, type->getName(), MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS);
-            }
-        }
-    }
-    if (!create.attach && !settings.allow_experimental_variant_type)
-    {
-        for (const auto & [name, type] : properties.columns.getAllPhysical())
-        {
-            if (isVariant(type))
-            {
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                        "Cannot create table with column '{}' which type is '{}' "
-                        "because experimental Variant type is not allowed. "
-                        "Set setting allow_experimental_variant_type = 1 in order to allow it",
-                        name, type->getName());
-            }
-        }
+            validateDataType(name_and_type_pair.type, validation_settings);
     }
 }
 
diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp
index fcdad7c93c1..056674c4379 100644
--- a/src/Interpreters/parseColumnsListForTableFunction.cpp
+++ b/src/Interpreters/parseColumnsListForTableFunction.cpp
@@ -9,6 +9,7 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeMap.h>
 
 
@@ -96,6 +97,11 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings
         validateDataType(map_type->getKeyType(), settings);
         validateDataType(map_type->getValueType(), settings);
     }
+    else if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
+    {
+        for (const auto & variant : variant_type->getVariants())
+            validateDataType(variant, settings);
+    }
 }
 
 ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context)
diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql
index 663d39cb1e2..8c0d2308d8f 100644
--- a/tests/queries/0_stateless/02981_nested_bad_types.sql
+++ b/tests/queries/0_stateless/02981_nested_bad_types.sql
@@ -8,14 +8,34 @@ select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPIC
 select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+
+create table test (x Array(LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Array(Array(LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Map(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Map(String, Map(String, LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Tuple(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Tuple(String, Array(Map(String, LowCardinality(UInt64))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+
+
+
+select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select [[['42']]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+select map('a', '42')::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select map('a', map('b', ['42']))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', '42')::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
+select '42'::Variant(UInt64, FixedString(1000000)) settings allow_experimental_variant_type=1; -- {serverError ILLEGAL_COLUMN}
+
+
+create table test (x Array(FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Array(Array(FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Map(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Map(String, Map(String, FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Tuple(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 
-select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
-select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
-select [[[42]]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
-select map('a', 42)::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
-select map('a', map('b', [42]))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
-select tuple('a', 42)::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
-select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
 
 select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
 select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
@@ -25,3 +45,10 @@ select map('a', map('b', [42]))::Map(String, Map(String, Array(Variant(String, U
 select tuple('a', 42)::Tuple(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
 select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN}
 
+create table test (x Array(Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Array(Array(Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Map(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Map(String, Map(String, Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Tuple(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Tuple(String, Array(Map(String, Variant(String, UInt64))))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+

From 0d21004218c8fad0be30493629fd93927f5d71b0 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 31 Jan 2024 13:50:15 +0000
Subject: [PATCH 194/884] WIP on StorageMerge

---
 .../OptimizeGroupByInjectiveFunctionsPass.cpp |  2 +-
 .../OptimizeGroupByInjectiveFunctionsPass.h   |  2 +-
 .../RewriteSumFunctionWithSumAndCountPass.cpp |  2 +-
 .../RewriteSumFunctionWithSumAndCountPass.h   |  2 +-
 src/Storages/StorageMerge.cpp                 | 40 ++++++++++++-------
 src/Storages/StorageMerge.h                   |  1 +
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
index 864752cdbeb..ad649834fb0 100644
--- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
+++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
@@ -115,7 +115,7 @@ private:
 
 }
 
-void OptimizeGroupByInjectiveFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void OptimizeGroupByInjectiveFunctionsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     OptimizeGroupByInjectiveFunctionsVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h
index 22390451824..b3ba9033b92 100644
--- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h
+++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h
@@ -14,7 +14,7 @@ public:
 
     String getDescription() override { return "Replaces injective functions by it's arguments in GROUP BY section."; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 };
 
 }
diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
index 24ddb7522c8..3c93bf9e1bf 100644
--- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
+++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
@@ -120,7 +120,7 @@ private:
 
 }
 
-void RewriteSumFunctionWithSumAndCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void RewriteSumFunctionWithSumAndCountPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
     RewriteSumFunctionWithSumAndCountVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h
index e878a2c0e7a..4615532e3d9 100644
--- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h
+++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h
@@ -20,7 +20,7 @@ public:
 
     String getDescription() override { return "Rewrite sum(column +/- literal) into sum(column) and literal * count(column)"; }
 
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+    void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
 
 };
 
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 45bd20bd859..cda47596a4f 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -54,6 +54,7 @@
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <base/defines.h>
 #include <base/range.h>
+#include "Common/logger_useful.h"
 #include <Common/Exception.h>
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
@@ -388,7 +389,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
         Names column_names_as_aliases;
         Aliases aliases;
-        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names_as_aliases, aliases);
+        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names, column_names_as_aliases, aliases);
 
         auto source_pipeline = createSources(
             child_plan.plan,
@@ -524,8 +525,6 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
         Names column_names_as_aliases;
         Names real_column_names = column_names;
 
-        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names_as_aliases, aliases);
-
         const auto & database_name = std::get<0>(table);
         const auto & table_name = std::get<3>(table);
         auto row_policy_filter_ptr = context->getRowPolicyFilter(
@@ -538,6 +537,8 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
             row_policy_data_opt->extendNames(real_column_names);
         }
 
+        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases);
+
         if (!context->getSettingsRef().allow_experimental_analyzer)
         {
             auto storage_columns = storage_metadata_snapshot->getColumns();
@@ -628,15 +629,14 @@ public:
     }
 };
 
-bool hasUnknownColumn(const QueryTreeNodePtr & node,
-    QueryTreeNodePtr original_table_expression,
-    QueryTreeNodePtr replacement_table_expression)
+bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
 {
     QueryTreeNodes stack = { node };
     while (!stack.empty())
     {
         auto current = stack.back();
         stack.pop_back();
+        LOG_DEBUG(&Poco::Logger::get("hasUnknownColumn"), "Expression: {}", current->formatASTForErrorMessage());
 
         switch (current->getNodeType())
         {
@@ -646,15 +646,13 @@ bool hasUnknownColumn(const QueryTreeNodePtr & node,
             {
                 auto * column_node = current->as<ColumnNode>();
                 auto source = column_node->getColumnSourceOrNull();
-                if (source != original_table_expression)
+                if (source != replacement_table_expression)
                     return true;
-                else
-                    column_node->setColumnSource(replacement_table_expression);
                 break;
             }
             default:
             {
-                for (const auto & child : node->getChildren())
+                for (const auto & child : current->getChildren())
                 {
                     if (child)
                         stack.push_back(child);
@@ -670,9 +668,16 @@ QueryTreeNodePtr removeJoin(
     QueryTreeNodePtr original_table_expression,
     QueryTreeNodePtr replacement_table_expression)
 {
+    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Entered the function");
+
     auto * query_node = query->as<QueryNode>();
     auto join_tree = query_node->getJoinTree();
-    auto modified_query = query_node->cloneAndReplace(join_tree, replacement_table_expression);
+    auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
+
+    auto * modified_query_node = modified_query->as<QueryNode>();
+
+    modified_query = modified_query->cloneAndReplace(modified_query_node->getJoinTree(), replacement_table_expression);
+    modified_query_node = modified_query->as<QueryNode>();
 
     query_node = modified_query->as<QueryNode>();
 
@@ -685,11 +690,12 @@ QueryTreeNodePtr removeJoin(
 
     if (join_tree->as<TableNode>() == nullptr && join_tree->as<TableFunctionNode>() == nullptr)
     {
-        auto & projection = query_node->getProjection().getNodes();
-        auto projection_columns = query_node->getProjectionColumns();
+        auto & projection = modified_query_node->getProjection().getNodes();
+        auto projection_columns = modified_query_node->getProjectionColumns();
         for (size_t i = 0; i < projection.size();)
         {
-            if (hasUnknownColumn(projection[i], original_table_expression, replacement_table_expression))
+            LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Processing: {}", i);
+            if (hasUnknownColumn(projection[i], replacement_table_expression))
             {
                 projection.erase(projection.begin() + i);
                 projection_columns.erase(projection_columns.begin() + i);
@@ -701,6 +707,8 @@ QueryTreeNodePtr removeJoin(
         query_node->resolveProjectionColumns(std::move(projection_columns));
     }
 
+    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Result:\n{}", modified_query->dumpTree());
+
     return modified_query;
 }
 
@@ -709,9 +717,11 @@ QueryTreeNodePtr removeJoin(
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
     const StorageWithLockAndName & storage_with_lock_and_name,
     const StorageSnapshotPtr & storage_snapshot,
+    Names real_column_names,
     Names & column_names_as_aliases,
     Aliases & aliases) const
 {
+    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Procesing query");
     const auto & [database_name, storage, storage_lock, table_name] = storage_with_lock_and_name;
     const StorageID current_storage_id = storage->getStorageID();
 
@@ -753,7 +763,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         if (with_aliases)
         {
             auto filter_actions_dag = std::make_shared<ActionsDAG>();
-            for (const auto & column : column_names)
+            for (const auto & column : real_column_names)
             {
                 const auto column_default = storage_columns.getDefault(column);
                 bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 5acc06ab8de..c500d1358a3 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -192,6 +192,7 @@ private:
     SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context,
         const StorageWithLockAndName & storage_with_lock_and_name,
         const StorageSnapshotPtr & storage_snapshot,
+        Names real_column_names,
         Names & column_names_as_aliases,
         Aliases & aliases) const;
 

From 572d47acc72f3cc2188a4423a36b31e8e960277a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 31 Jan 2024 14:43:06 +0000
Subject: [PATCH 195/884] Remove logging

---
 src/Storages/StorageMerge.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index cda47596a4f..17a6ade4059 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -54,7 +54,6 @@
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <base/defines.h>
 #include <base/range.h>
-#include "Common/logger_useful.h"
 #include <Common/Exception.h>
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
@@ -636,7 +635,6 @@ bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacemen
     {
         auto current = stack.back();
         stack.pop_back();
-        LOG_DEBUG(&Poco::Logger::get("hasUnknownColumn"), "Expression: {}", current->formatASTForErrorMessage());
 
         switch (current->getNodeType())
         {
@@ -668,8 +666,6 @@ QueryTreeNodePtr removeJoin(
     QueryTreeNodePtr original_table_expression,
     QueryTreeNodePtr replacement_table_expression)
 {
-    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Entered the function");
-
     auto * query_node = query->as<QueryNode>();
     auto join_tree = query_node->getJoinTree();
     auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
@@ -694,7 +690,6 @@ QueryTreeNodePtr removeJoin(
         auto projection_columns = modified_query_node->getProjectionColumns();
         for (size_t i = 0; i < projection.size();)
         {
-            LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Processing: {}", i);
             if (hasUnknownColumn(projection[i], replacement_table_expression))
             {
                 projection.erase(projection.begin() + i);
@@ -707,8 +702,6 @@ QueryTreeNodePtr removeJoin(
         query_node->resolveProjectionColumns(std::move(projection_columns));
     }
 
-    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Result:\n{}", modified_query->dumpTree());
-
     return modified_query;
 }
 
@@ -721,7 +714,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
     Names & column_names_as_aliases,
     Aliases & aliases) const
 {
-    LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "Procesing query");
     const auto & [database_name, storage, storage_lock, table_name] = storage_with_lock_and_name;
     const StorageID current_storage_id = storage->getStorageID();
 

From a91101a21f4d0af445fcc9346bdd65d1f1ac7258 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 31 Jan 2024 07:44:17 +0000
Subject: [PATCH 196/884] Working implementation

---
 src/Coordination/Changelog.cpp             | 859 +++++++++++++++------
 src/Coordination/Changelog.h               | 139 +++-
 src/Coordination/CoordinationSettings.cpp  |   5 +
 src/Coordination/CoordinationSettings.h    |   5 +-
 src/Coordination/FourLetterCommand.cpp     |   6 +
 src/Coordination/Keeper4LWInfo.h           |  14 +-
 src/Coordination/KeeperLogStore.cpp        |  12 +
 src/Coordination/KeeperLogStore.h          |   6 +-
 src/Coordination/KeeperServer.cpp          |  12 +-
 src/Coordination/KeeperSnapshotManager.cpp |   5 +-
 src/Coordination/KeeperStateMachine.cpp    |  91 ++-
 11 files changed, 850 insertions(+), 304 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 752251a3838..1d7aa62b1d1 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1,6 +1,11 @@
+#include <algorithm>
+#include <exception>
 #include <filesystem>
 #include <iterator>
+#include <mutex>
 #include <Coordination/Changelog.h>
+#include <Coordination/Keeper4LWInfo.h>
+#include <Coordination/KeeperContext.h>
 #include <Disks/DiskLocal.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
@@ -15,7 +20,10 @@
 #include <Common/SipHash.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
+#include <Common/ThreadPool.h>
 #include <libnuraft/log_val_type.hxx>
+#include <libnuraft/log_entry.hxx>
+#include <libnuraft/raft_server.hxx>
 
 
 namespace DB
@@ -188,9 +196,9 @@ public:
             }
 
             auto latest_log_disk = getLatestLogDisk();
-            assert(file_description->disk == latest_log_disk);
+            chassert(file_description->disk == latest_log_disk);
             file_buf = latest_log_disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode);
-            assert(file_buf);
+            chassert(file_buf);
             last_index_written.reset();
             current_file_description = std::move(file_description);
 
@@ -247,7 +255,7 @@ public:
         }
 
         auto & write_buffer = getBuffer();
-        auto current_position = write_buffer.count();
+        auto current_position =  initial_file_size + write_buffer.count();
         writeIntBinary(computeRecordChecksum(record), write_buffer);
 
         writeIntBinary(record.header.version, write_buffer);
@@ -268,7 +276,11 @@ public:
         else
         {
             unflushed_indices_with_log_location.emplace_back(
-                record.header.index, LogLocation{.file_description = current_file_description, .position = current_position});
+                record.header.index,
+                LogLocation{
+                    .file_description = current_file_description,
+                    .position = current_position,
+                    .size = record.header.blob_size});
         }
 
         last_index_written = record.header.index;
@@ -479,6 +491,9 @@ private:
     LoggerPtr const log;
 };
 
+namespace
+{
+
 struct ChangelogReadResult
 {
     /// Total entries read from log including skipped.
@@ -504,9 +519,6 @@ struct ChangelogReadResult
     bool error;
 };
 
-namespace
-{
-
 ChangelogRecord readChangelogRecord(ReadBuffer & read_buf, const std::string & filepath)
 {
     /// Read checksum
@@ -557,6 +569,11 @@ LogEntryPtr logEntryFromRecord(const ChangelogRecord & record)
     return nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, static_cast<nuraft::log_val_type>(record.header.value_type));
 }
 
+size_t logEntrySize(nuraft::log_entry & log_entry)
+{
+    return log_entry.get_buf().size();
+}
+
 }
 
 class ChangelogReader
@@ -585,7 +602,7 @@ public:
 
                 /// Check for duplicated changelog ids
                 if (entry_storage.contains(record.header.index))
-                    entry_storage.eraseIf([&record](const auto index) { return index >= record.header.index; });
+                    entry_storage.cleanAfter(record.header.index + 1);
 
                 result.total_entries_read_from_log += 1;
 
@@ -598,13 +615,14 @@ public:
                 if (result.first_read_index == 0)
                     result.first_read_index = record.header.index;
 
-                auto log_size  = read_buf->count() - result.last_position;
-
                 /// Put it into in memory structure
                 entry_storage.addEntryWithLocation(
                     record.header.index,
                     log_entry,
-                    LogLocation{.file_description = changelog_description, .position = static_cast<size_t>(result.last_position), .size = log_size});
+                    LogLocation{
+                        .file_description = changelog_description,
+                        .position = static_cast<size_t>(result.last_position),
+                        .size = record.header.blob_size});
                 result.last_read_index = record.header.index;
 
                 if (result.total_entries_read_from_log % 50000 == 0)
@@ -636,28 +654,140 @@ private:
     std::unique_ptr<ReadBuffer> read_buf;
 };
 
-LogEntryStorage::LogEntryStorage(const LogFileSettings & log_settings)
+LogEntryStorage::LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_)
     : latest_logs_cache(log_settings.latest_logs_cache_size_threshold)
     , commit_logs_cache(log_settings.commit_logs_cache_size_threshold)
+    , prefetch_queue(std::numeric_limits<uint64_t>::max())
+    , keeper_context(std::move(keeper_context_))
+    , log(getLogger("Changelog"))
 {
-
+    commit_logs_prefetcher = std::make_unique<ThreadFromGlobalPool>([this] { prefetchCommitLogs(); });
 }
 
-size_t LogEntryStorage::size() const
+LogEntryStorage::~LogEntryStorage()
 {
-    return total_entries;
+    shutdown();
+}
+
+void LogEntryStorage::prefetchCommitLogs()
+{
+    std::shared_ptr<PrefetchInfo> prefetch_info;
+    while (prefetch_queue.pop(prefetch_info))
+    {
+        if (prefetch_info->cancel)
+        {
+            prefetch_info->done = true;
+            prefetch_info->done.notify_all();
+            continue;
+        }
+
+        auto current_index = prefetch_info->commit_prefetch_index_range.first;
+        try
+        {
+            for (const auto & prefetch_file_info : prefetch_info->file_infos)
+            {
+                const auto & [changelog_description, position, count] = prefetch_file_info;
+                std::lock_guard file_lock(changelog_description->file_mutex);
+                auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
+                file->seek(position, SEEK_SET);
+                LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position);
+
+                for (size_t i = 0; i < count; ++i)
+                {
+                    if (prefetch_info->cancel)
+                        break;
+
+                    auto record = readChangelogRecord(*file, changelog_description->path);
+                    auto entry = logEntryFromRecord(record);
+                    if (current_index != record.header.index)
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid index prefetched, expected {}, actual {}", current_index, record.header.index);
+
+                    commit_logs_cache.setPrefetchedEntry(record.header.index, std::move(entry), nullptr);
+                    ++current_index;
+                }
+
+                if (prefetch_info->cancel)
+                    break;
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "While prefetching log entries");
+            auto exception = std::current_exception();
+
+            for (; current_index <= prefetch_info->commit_prefetch_index_range.second; ++current_index)
+                commit_logs_cache.setPrefetchedEntry(current_index, nullptr, exception);
+        }
+
+        prefetch_info->done = true;
+        prefetch_info->done.notify_all();
+    }
+}
+
+void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) const
+{
+    if (keeper_context->isShutdownCalled())
+        return;
+
+    if (!commit_logs_cache.empty() && commit_logs_cache.max_index_in_cache != last_committed_index)
+        return;
+
+    if (logs_location.empty())
+        return;
+
+    /// we are already prefetching some logs for commit
+    if (current_prefetch_info && !current_prefetch_info->done)
+        return;
+
+    auto new_prefetch_info = std::make_shared<PrefetchInfo>();
+    auto & [prefetch_from, prefetch_to] = new_prefetch_info->commit_prefetch_index_range;
+    size_t current_index = commit_logs_cache.cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1;
+    prefetch_from = current_index;
+    size_t total_size = 0;
+    std::vector<FileReadInfo> file_infos;
+    FileReadInfo * current_file_info = nullptr;
+    for (; latest_logs_cache.empty() || current_index < latest_logs_cache.min_index_in_cache; ++current_index)
+    {
+        const auto & [changelog_description, position, size] = logs_location.at(current_index);
+        if (total_size == 0)
+            current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1);
+        else if (total_size + size > commit_logs_cache.size_threshold)
+            break;
+        else if (changelog_description == current_file_info->file_description)
+            ++current_file_info->count;
+        else
+            current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1);
+
+        total_size += size;
+        commit_logs_cache.addPrefetchedEntry(current_index, size);
+    }
+
+    if (!file_infos.empty())
+    {
+        current_prefetch_info = std::move(new_prefetch_info);
+        prefetch_to = current_index - 1;
+        LOG_TRACE(log, "Will prefetch {} commit log entries [{} - {}]", prefetch_to - prefetch_from + 1, prefetch_from, prefetch_to);
+
+        current_prefetch_info->file_infos = std::move(file_infos);
+        auto inserted = prefetch_queue.push(current_prefetch_info);
+        chassert(inserted);
+    }
+}
+
+CacheEntry::CacheEntry(LogEntryPtr entry_)
+    : entry(std::move(entry_))
+{
+    if (entry == nullptr)
+        is_prefetched = true;
 }
 
 LogEntryStorage::InMemoryCache::InMemoryCache(size_t size_threshold_)
     : size_threshold(size_threshold_)
 {}
 
-void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, const LogEntryPtr & log_entry)
+void LogEntryStorage::InMemoryCache::updateStatsWithNewEntry(uint64_t index, size_t size)
 {
-    auto [_, inserted] = cache.emplace(index, log_entry);
-    if (!inserted)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
-    cache_size += log_entry->get_buf_ptr()->size();
+    cache_size += size;
 
     if (cache.size() == 1)
     {
@@ -670,52 +800,157 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, const LogEntryPtr
     }
 }
 
-void LogEntryStorage::InMemoryCache::addEntry(IndexToLogEntryNode && node)
+void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, LogEntryPtr log_entry)
+{
+    auto entry_size = logEntrySize(*log_entry);
+    auto [_, inserted] = cache.emplace(index, std::move(log_entry));
+    if (!inserted)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
+    updateStatsWithNewEntry(index, entry_size);
+}
+
+void LogEntryStorage::InMemoryCache::addEntry(IndexToCacheEntryNode && node)
 {
     auto index = node.key();
-    auto entry_size = node.mapped()->get_buf_ptr()->size();
+    auto entry_size = logEntrySize(*node.mapped().entry);
 
     auto result = cache.insert(std::move(node));
     if (!result.inserted)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
-
-    cache_size += entry_size;
-    if (cache.size() == 1)
-    {
-        min_index_in_cache = index;
-        max_index_in_cache = index;
-    }
-    else
-    {
-        max_index_in_cache = index;
-    }
+    updateStatsWithNewEntry(index, entry_size);
 }
 
-IndexToLogEntryNode LogEntryStorage::InMemoryCache::popOldestEntry()
+void LogEntryStorage::InMemoryCache::addPrefetchedEntry(uint64_t index, size_t size)
+{
+    auto [_, inserted] = cache.emplace(index, nullptr);
+    if (!inserted)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
+    updateStatsWithNewEntry(index, size);
+}
+
+void LogEntryStorage::InMemoryCache::setPrefetchedEntry(uint64_t index, LogEntryPtr log_entry, std::exception_ptr exception)
+{
+    auto it = cache.find(index);
+    if (it == cache.end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing expected index {} in cache", index);
+
+    {
+        std::lock_guard lock(it->second.entry_mutex);
+        if (exception)
+            it->second.exception = exception;
+        else
+            it->second.entry = std::move(log_entry);
+    }
+    it->second.is_prefetched = false;
+    it->second.entry_prefetched_cv.notify_all();
+}
+
+IndexToCacheEntryNode LogEntryStorage::InMemoryCache::popOldestEntry()
 {
     auto node = cache.extract(min_index_in_cache);
     if (node.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't find the oldest entry of index {} in logs cache", min_index_in_cache);
     ++min_index_in_cache;
-    cache_size -= node.mapped()->get_buf_ptr()->size();
+    cache_size -= logEntrySize(*node.mapped().entry);
     return node;
 }
 
+bool LogEntryStorage::InMemoryCache::containsEntry(uint64_t index) const
+{
+    return !cache.empty() && index >= min_index_in_cache && index <= max_index_in_cache;
+}
+
 LogEntryPtr LogEntryStorage::InMemoryCache::getEntry(uint64_t index) const
 {
-    if (index < min_index_in_cache || index > max_index_in_cache)
+    if (!containsEntry(index))
         return nullptr;
 
     auto it = cache.find(index);
     if (it == cache.end())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Index {} missing from cache while it should be present", index);
 
-    return it->second;
+    const auto & cache_entry = it->second;
+    if (cache_entry.is_prefetched)
+    {
+        std::unique_lock lock(cache_entry.entry_mutex);
+        cache_entry.entry_prefetched_cv.wait(lock, [&]{ return cache_entry.entry != nullptr; });
+    }
+
+    if (cache_entry.exception)
+        std::rethrow_exception(cache_entry.exception);
+
+    return cache_entry.entry;
+}
+
+void LogEntryStorage::InMemoryCache::cleanUpTo(uint64_t index)
+{
+    if (index <= min_index_in_cache)
+        return;
+
+    if (index > max_index_in_cache)
+    {
+        cache.clear();
+        cache_size = 0;
+    }
+    else
+    {
+        for (size_t i = min_index_in_cache; i < index; ++i)
+        {
+            auto it = cache.find(i);
+            if (it == cache.end())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
+
+            cache_size -= logEntrySize(*it->second.entry);
+            cache.erase(it);
+        }
+        min_index_in_cache = index;
+    }
+}
+
+void LogEntryStorage::InMemoryCache::cleanAfter(uint64_t index)
+{
+    if (index >= max_index_in_cache)
+        return;
+
+    if (index < min_index_in_cache)
+    {
+        cache.clear();
+        cache_size = 0;
+    }
+    else
+    {
+        for (size_t i = index + 1; i < max_index_in_cache; ++i)
+        {
+            auto it = cache.find(i);
+            if (it == cache.end())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
+
+            cache_size -= logEntrySize(*it->second.entry);
+            cache.erase(it);
+        }
+        max_index_in_cache = index;
+    }
+}
+
+void LogEntryStorage::InMemoryCache::clear()
+{
+    cache.clear();
+    cache_size = 0;
+}
+
+bool LogEntryStorage::InMemoryCache::empty() const
+{
+    return cache.empty();
+}
+
+size_t LogEntryStorage::InMemoryCache::numberOfEntries() const
+{
+    return cache.size();
 }
 
 bool LogEntryStorage::InMemoryCache::hasSpaceAvailable(size_t log_entry_size) const
 {
-    return cache.empty() || cache_size + log_entry_size < size_threshold;
+    return size_threshold == 0 || empty() || cache_size + log_entry_size < size_threshold;
 }
 
 void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry)
@@ -723,68 +958,167 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry)
     /// we update the cache for added entries on refreshCache call
     latest_logs_cache.addEntry(index, log_entry);
 
-    ++total_entries;
+    if (log_entry->get_val_type() == nuraft::conf)
+    {
+        latest_config = log_entry;
+        latest_config_index = index;
+        conf_logs_indices.insert(index);
+    }
+
+    if (first_log_entry == nullptr)
+    {
+        first_log_index = index;
+        first_log_entry = log_entry;
+    }
+}
+
+bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size)
+{
+    /// if commit logs cache is empty, we need it only if it's the next log to commit
+    if (commit_logs_cache.empty())
+        return keeper_context->lastCommittedIndex() + 1 == index;
+    
+    return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size);
 }
 
 void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location)
 {
-    auto entry_size = log_entry->get_buf_ptr()->size();
+    auto entry_size = logEntrySize(*log_entry);
     while (!latest_logs_cache.hasSpaceAvailable(entry_size))
     {
         auto entry_handle = latest_logs_cache.popOldestEntry();
-        if (commit_logs_cache.max_index_in_cache == entry_handle.key() - 1 && commit_logs_cache.hasSpaceAvailable(entry_handle.mapped()->get_buf_ptr()->size()))
+        size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry); 
+        if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size))
             commit_logs_cache.addEntry(std::move(entry_handle));
     }
 
     latest_logs_cache.addEntry(index, log_entry);
+
     logs_location.emplace(index, std::move(log_location));
+
+    if (log_entry->get_val_type() == nuraft::conf)
+    {
+        latest_config = log_entry;
+        latest_config_index = index;
+        conf_logs_indices.insert(index);
+    }
 }
 
-void LogEntryStorage::eraseIf(std::function<bool(size_t)> index_predicate)
+void LogEntryStorage::cleanUpTo(uint64_t index)
 {
-    //std::erase_if(logs_cache, [&](const auto & item) { return index_predicate(item.first); });
+    latest_logs_cache.cleanUpTo(index);
+    /// uncommitted logs should never be compacted so we don't have to handle
+    /// logs that are currently being prefetched
+    commit_logs_cache.cleanUpTo(index);
+    std::erase_if(logs_location, [&](const auto & item) { return item.first < index; });
+    std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index < index; });
+    if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end())
+    {
+        latest_config_index = *it;
+        latest_config = getEntry(latest_config_index);
+    }
+    else
+        latest_config = nullptr;
+
+}
+
+void LogEntryStorage::cleanAfter(uint64_t index)
+{
+    latest_logs_cache.cleanAfter(index);
+    /// if we cleared all latest logs, there is a possibility we would need to clear commit logs
+    if (latest_logs_cache.empty())
+    {
+        commit_logs_cache.getEntry(index);
+        if (current_prefetch_info && !current_prefetch_info->done)
+        {
+            auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range;
+            if (index >= prefetch_from && index <= prefetch_to)
+            {
+                current_prefetch_info->cancel = true;
+                current_prefetch_info->done.wait(false);
+            }
+        }
+
+        commit_logs_cache.cleanAfter(index);
+        startCommitLogsPrefetch(keeper_context->lastCommittedIndex());
+    }
+
+    std::erase_if(logs_location, [&](const auto & item) { return item.first > index; });
+    if (!logs_location.empty())
+        max_index_with_location = index;
+    else if (latest_logs_cache.empty())
+        /// if we don't store any logs, reset first log cache
+        first_log_entry = nullptr;
+
+    std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index > index; });
+    if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end())
+    {
+        latest_config_index = *it;
+        latest_config = getEntry(latest_config_index);
+    }
+    else
+        latest_config = nullptr;
 }
 
 bool LogEntryStorage::contains(uint64_t index) const
 {
-    return logs_cache.contains(index);
+    return logs_location.contains(index) || latest_logs_cache.containsEntry(index);
 }
 
 LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
 {
-    if (index >= min_index_in_cache)
-        return logs_cache.at(index);
+    auto last_committed_index = keeper_context->lastCommittedIndex();
+    commit_logs_cache.cleanUpTo(last_committed_index);
+    startCommitLogsPrefetch(last_committed_index);
 
-    std::lock_guard lock(logs_location_mutex);
+    LogEntryPtr entry = nullptr;
 
-    if (auto it = logs_location.find(index); it != logs_location.end())
+    if (latest_config != nullptr && index == latest_config_index)
     {
-        const auto & [changelog_description, position] = it->second;
+        entry = latest_config;
+    }
+    else if (first_log_entry != nullptr && index == first_log_index)
+    {
+        entry = first_log_entry;
+    }
+    else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index))
+    {
+        entry = std::move(entry_from_latest_cache);
+    }
+    else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index))
+    {
+        entry = std::move(entry_from_commit_cache);
+    }
+    else if (auto it = logs_location.find(index); it != logs_location.end())
+    {
+        const auto & [changelog_description, position, size] = it->second;
         std::lock_guard file_lock(changelog_description->file_mutex);
-        //std::cout << "Reading from path " << changelog_description->path << std::endl;
-        auto file = changelog_description->disk->readFile(changelog_description->path);
+        auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
         file->seek(position, SEEK_SET);
+        LOG_TRACE(log, "Reading log entry at index {} from path {}, position {}, size {}", index, changelog_description->path, position, size);
 
         auto record = readChangelogRecord(*file, changelog_description->path);
-        return logEntryFromRecord(record);
+        entry = logEntryFromRecord(record);
     }
-    else
-        std::cout << "Nothing found" << std::endl;
-
-    return nullptr;
+    return entry;
 }
 
 void LogEntryStorage::clear()
 {
-    logs_cache.clear();
+    latest_logs_cache.clear();
+    commit_logs_cache.clear();
+    logs_location.clear();
 }
 
 LogEntryPtr LogEntryStorage::getLatestConfigChange() const
 {
-    for (const auto & [_, entry] : latest_logs_cache.logs_cache)
-        if (entry->get_val_type() == nuraft::conf)
-            return entry;
-    return nullptr;
+    return latest_config;
+}
+
+void LogEntryStorage::cacheFirstLog(uint64_t first_index)
+{
+    first_log_entry = getEntry(first_index);
+    first_log_index = first_index;
 }
 
 void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocation>> indices_with_log_locations)
@@ -798,97 +1132,131 @@ void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocatio
 
 void LogEntryStorage::refreshCache()
 {
-    if (logs_cache.size() <= 1000)
+    if (latest_logs_cache.cache_size <= latest_logs_cache.size_threshold)
         return;
 
-    std::lock_guard lock(logs_location_mutex);
     if (logs_location.empty())
         return;
 
-    auto max_index_to_remove = min_index_in_cache + (logs_cache.size() - 1000);
-    for (auto & [index, log_location] : unapplied_indices_with_log_locations)
+    std::vector<IndexWithLogLocation> new_unapplied_indices_with_log_locations;
+    {
+        std::lock_guard lock(logs_location_mutex);
+        new_unapplied_indices_with_log_locations.swap(unapplied_indices_with_log_locations);
+    }
+
+    for (auto & [index, log_location] : new_unapplied_indices_with_log_locations)
     {
         logs_location.emplace(index, std::move(log_location));
         max_index_with_location = index;
     }
 
-    for (size_t index = min_index_in_cache; index < max_index_to_remove; ++index)
+    while (latest_logs_cache.numberOfEntries() > 1 && latest_logs_cache.min_index_in_cache <= max_index_with_location
+           && latest_logs_cache.cache_size > latest_logs_cache.size_threshold)
     {
-        if (index <= max_index_with_location)
-        {
-            logs_cache.erase(index);
-            min_index_in_cache = index + 1;
-        }
+        auto node = latest_logs_cache.popOldestEntry();
+        if (shouldMoveLogToCommitCache(node.key(), logEntrySize(*node.mapped().entry)))
+            commit_logs_cache.addEntry(std::move(node));
     }
-
-    unapplied_indices_with_log_locations.clear();
 }
 
 LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end) const
 {
     LogEntriesPtr ret = nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
     ret->reserve(end - start);
-    
+
     /// we rely on fact that changelogs need to be written sequentially with
     /// no other writes between
-    struct ReadInfo
+    std::optional<FileReadInfo> read_info;
+    const auto set_new_file = [&](const auto & log_location)
     {
-        ChangelogFileDescriptionPtr file_description;
-        size_t start_position = 0;
-        size_t count = 0;
+        read_info.emplace();
+        read_info->file_description = log_location.file_description;
+        read_info->position = log_location.position;
+        read_info->count = 1;
     };
 
-    /// we have to collect some logs from disks because they are not cached
-    if (start < min_index_in_cache)
+    const auto flush_file = [&]
     {
-        //std::cout << "Reading some from disk" << std::endl;
-        std::lock_guard logs_location_lock(logs_location_mutex);
-        std::vector<ReadInfo> read_infos;
-        for (uint64_t i = start; i < min_index_in_cache && i < end; ++i)
+        if (!read_info)
+            return;
+
+        const auto & [file_description, start_position, count] = *read_info;
+        LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count);
+        std::lock_guard file_lock(file_description->file_mutex);
+        auto file = file_description->disk->readFile(file_description->path);
+        file->seek(start_position, SEEK_SET);
+
+        for (size_t i = 0; i < count; ++i)
+        {
+            auto record = readChangelogRecord(*file, file_description->path);
+            ret->push_back(logEntryFromRecord(record));
+        }
+
+        read_info.reset();
+    };
+
+    for (size_t i = start; i < end; ++i)
+    {
+        if (auto commit_cache_entry = commit_logs_cache.getEntry(i))
+        {
+            flush_file();
+            ret->push_back(std::move(commit_cache_entry));
+        }
+        else if (auto latest_cache_entry = latest_logs_cache.getEntry(i))
+        {
+            flush_file();
+            ret->push_back(std::move(latest_cache_entry));
+        }
+        else
         {
             const auto & log_location = logs_location.at(i);
-            const auto push_new_file = [&]
-            {
-                read_infos.push_back(ReadInfo
-                {
-                    .file_description = log_location.file_description,
-                    .start_position = log_location.position,
-                    .count = 1,
-                });
-            };
 
-            if (read_infos.empty())
-                push_new_file();
-            else if (auto & last = read_infos.back(); log_location.file_description == last.file_description)
-                ++last.count;
+            if (!read_info)
+                set_new_file(log_location);
+            else if (read_info->file_description == log_location.file_description)
+                ++read_info->count;
             else
-                push_new_file();
-        }
-
-        for (const auto & [file_description, start_position, count] : read_infos)
-        {
-            std::cout << "Reading from path " << file_description->path << " " << count << " entries" << std::endl;
-            std::lock_guard file_lock(file_description->file_mutex);
-            auto file = file_description->disk->readFile(file_description->path);
-            file->seek(start_position, SEEK_SET);
-
-            for (size_t i = 0; i < count; ++i)
             {
-                auto record = readChangelogRecord(*file, file_description->path);
-                ret->push_back(logEntryFromRecord(record));
+                flush_file();
+                set_new_file(log_location);
             }
         }
-
-        start = min_index_in_cache;
     }
-    else
-        std::cout << "Nothing read from disk" << std::endl;
-
-    for (uint64_t i = start; i < end; ++i)
-        ret->push_back(logs_cache.at(i));
 
+    flush_file();
     return ret;
+}
 
+void LogEntryStorage::getKeeperLogInfo(KeeperLogInfo & log_info) const
+{
+    log_info.latest_logs_cache_entries = latest_logs_cache.numberOfEntries();
+    log_info.latest_logs_cache_size = latest_logs_cache.cache_size;
+
+    log_info.commit_logs_cache_entries = commit_logs_cache.numberOfEntries();
+    log_info.commit_logs_cache_size = commit_logs_cache.cache_size;
+}
+
+bool LogEntryStorage::isConfLog(uint64_t index) const
+{
+    return conf_logs_indices.contains(index);
+}
+
+void LogEntryStorage::shutdown()
+{
+    if (std::exchange(is_shutdown, true))
+        return;
+        
+    if (!prefetch_queue.isFinished())
+        prefetch_queue.finish();
+
+    if (current_prefetch_info)
+    {
+        current_prefetch_info->cancel = true;
+        current_prefetch_info->done.wait(false); 
+    }
+
+    if (commit_logs_prefetcher->joinable())
+        commit_logs_prefetcher->join();
 }
 
 Changelog::Changelog(
@@ -897,117 +1265,125 @@ Changelog::Changelog(
     , rotate_interval(log_file_settings.rotate_interval)
     , compress_logs(log_file_settings.compress_logs)
     , log(log_)
-    , entry_storage(log_file_settings)
+    , entry_storage(log_file_settings, keeper_context_)
     , write_operations(std::numeric_limits<size_t>::max())
     , append_completion_queue(std::numeric_limits<size_t>::max())
     , keeper_context(std::move(keeper_context_))
     , flush_settings(flush_settings_)
 {
-    if (auto latest_log_disk = getLatestLogDisk();
-        log_file_settings.force_sync && dynamic_cast<const DiskLocal *>(latest_log_disk.get()) == nullptr)
+    try
     {
-        throw DB::Exception(
-            DB::ErrorCodes::BAD_ARGUMENTS,
-            "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n"
-            "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n"
-            "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any "
-            "supported disk and 'keeper_server.latest_log_storage_disk' to a local disk.\n"
-            "Otherwise, disable force_sync",
-            latest_log_disk->getName());
-    }
-
-    /// Load all files on changelog disks
-
-    std::unordered_set<DiskPtr> read_disks;
-
-    const auto load_from_disk = [&](const auto & disk)
-    {
-        if (read_disks.contains(disk))
-            return;
-
-        LOG_TRACE(log, "Reading from disk {}", disk->getName());
-        std::unordered_map<std::string, std::string> incomplete_files;
-
-        const auto clean_incomplete_file = [&](const auto & file_path)
+        if (auto latest_log_disk = getLatestLogDisk();
+            log_file_settings.force_sync && dynamic_cast<const DiskLocal *>(latest_log_disk.get()) == nullptr)
         {
-            if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end())
+            throw DB::Exception(
+                DB::ErrorCodes::BAD_ARGUMENTS,
+                "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n"
+                "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n"
+                "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any "
+                "supported disk and 'keeper_server.latest_log_storage_disk' to a local disk.\n"
+                "Otherwise, disable force_sync",
+                latest_log_disk->getName());
+        }
+
+        /// Load all files on changelog disks
+
+        std::unordered_set<DiskPtr> read_disks;
+
+        const auto load_from_disk = [&](const auto & disk)
+        {
+            if (read_disks.contains(disk))
+                return;
+
+            LOG_TRACE(log, "Reading from disk {}", disk->getName());
+            std::unordered_map<std::string, std::string> incomplete_files;
+
+            const auto clean_incomplete_file = [&](const auto & file_path)
             {
-                LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName());
-                disk->removeFile(file_path);
-                disk->removeFile(incomplete_it->second);
-                incomplete_files.erase(incomplete_it);
-                return true;
+                if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end())
+                {
+                    LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName());
+                    disk->removeFile(file_path);
+                    disk->removeFile(incomplete_it->second);
+                    incomplete_files.erase(incomplete_it);
+                    return true;
+                }
+
+                return false;
+            };
+
+            std::vector<std::string> changelog_files;
+            for (auto it = disk->iterateDirectory(""); it->isValid(); it->next())
+            {
+                const auto & file_name = it->name();
+                if (file_name == changelogs_detached_dir)
+                    continue;
+
+                if (file_name.starts_with(tmp_prefix))
+                {
+                    incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path());
+                    continue;
+                }
+
+                if (file_name.starts_with(DEFAULT_PREFIX))
+                {
+                    if (!clean_incomplete_file(it->path()))
+                        changelog_files.push_back(it->path());
+                }
+                else
+                {
+                    LOG_WARNING(log, "Unknown file found in log directory: {}", file_name);
+                }
             }
 
-            return false;
+            for (const auto & changelog_file : changelog_files)
+            {
+                if (clean_incomplete_file(fs::path(changelog_file).filename()))
+                    continue;
+
+                auto file_description = getChangelogFileDescription(changelog_file);
+                file_description->disk = disk;
+
+                LOG_TRACE(log, "Found {} on {}", changelog_file, disk->getName());
+                auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description));
+
+                if (!inserted)
+                    LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName());
+            }
+
+            for (const auto & [name, path] : incomplete_files)
+                disk->removeFile(path);
+
+            read_disks.insert(disk);
         };
 
-        std::vector<std::string> changelog_files;
-        for (auto it = disk->iterateDirectory(""); it->isValid(); it->next())
-        {
-            const auto & file_name = it->name();
-            if (file_name == changelogs_detached_dir)
-                continue;
+        /// Load all files from old disks
+        for (const auto & disk : keeper_context->getOldLogDisks())
+            load_from_disk(disk);
 
-            if (file_name.starts_with(tmp_prefix))
-            {
-                incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path());
-                continue;
-            }
-
-            if (file_name.starts_with(DEFAULT_PREFIX))
-            {
-                if (!clean_incomplete_file(it->path()))
-                    changelog_files.push_back(it->path());
-            }
-            else
-            {
-                LOG_WARNING(log, "Unknown file found in log directory: {}", file_name);
-            }
-        }
-
-        for (const auto & changelog_file : changelog_files)
-        {
-            if (clean_incomplete_file(fs::path(changelog_file).filename()))
-                continue;
-
-            auto file_description = getChangelogFileDescription(changelog_file);
-            file_description->disk = disk;
-
-            LOG_TRACE(log, "Found {} on {}", changelog_file, disk->getName());
-            auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description));
-
-            if (!inserted)
-                LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName());
-        }
-
-        for (const auto & [name, path] : incomplete_files)
-            disk->removeFile(path);
-
-        read_disks.insert(disk);
-    };
-
-    /// Load all files from old disks
-    for (const auto & disk : keeper_context->getOldLogDisks())
+        auto disk = getDisk();
         load_from_disk(disk);
 
-    auto disk = getDisk();
-    load_from_disk(disk);
+        auto latest_log_disk = getLatestLogDisk();
+        if (disk != latest_log_disk)
+            load_from_disk(latest_log_disk);
 
-    auto latest_log_disk = getLatestLogDisk();
-    if (disk != latest_log_disk)
-        load_from_disk(latest_log_disk);
+        if (existing_changelogs.empty())
+            LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath());
 
-    if (existing_changelogs.empty())
-        LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath());
+        clean_log_thread = std::make_unique<ThreadFromGlobalPool>([this] { cleanLogThread(); });
 
-    clean_log_thread = ThreadFromGlobalPool([this] { cleanLogThread(); });
+        write_thread = std::make_unique<ThreadFromGlobalPool>([this] { writeThread(); });
 
-    write_thread = ThreadFromGlobalPool([this] { writeThread(); });
+        append_completion_thread = std::make_unique<ThreadFromGlobalPool>([this] { appendCompletionThread(); });
 
-    append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); });
-
-    current_writer = std::make_unique<ChangelogWriter>(existing_changelogs, entry_storage, keeper_context, log_file_settings);
+        current_writer = std::make_unique<ChangelogWriter>(existing_changelogs, entry_storage, keeper_context, log_file_settings);
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+        throw;
+    }
 }
 
 void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep)
@@ -1162,13 +1538,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             remove_invalid_logs();
             description->disk->removeFile(description->path);
             existing_changelogs.erase(last_log_read_result->log_start_index);
-            entry_storage.eraseIf([last_log_read_result](const auto index) { return index >= last_log_read_result->log_start_index; });
+            entry_storage.cleanAfter(last_log_read_result->log_start_index - 1);
         }
         else if (last_log_read_result->error)
         {
             LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path);
             remove_invalid_logs();
-            entry_storage.eraseIf([last_log_read_result](const auto index) { return index > last_log_read_result->last_read_index; });
+            entry_storage.cleanAfter(last_log_read_result->log_start_index);
             move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
         }
         /// don't mix compressed and uncompressed writes
@@ -1203,6 +1579,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             moveFileBetweenDisks(description->disk, description, disk, description->path);
     }
 
+    if (size() != 0)
+        entry_storage.cacheFirstLog(min_log_id);
     initialized = true;
 }
 
@@ -1306,7 +1684,7 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
     LOG_WARNING(log, "Removing changelogs that go after broken changelog entry");
     removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end());
 
-    entry_storage.eraseIf([start_to_remove_from_log_id](const auto index) { return index >= start_to_remove_from_log_id; });
+    entry_storage.cleanAfter(start_to_remove_from_log_id - 1);
 }
 
 void Changelog::removeAllLogs()
@@ -1457,10 +1835,13 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
     if (!initialized)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
 
-    if (min_log_id == 0)
-        min_log_id = index;
-
     entry_storage.addEntry(index, log_entry);
+    if (min_log_id == 0)
+    {
+        min_log_id = index;
+        entry_storage.cacheFirstLog(index);
+    }
+
     max_log_id = index;
 
     if (!write_operations.push(AppendLog{index, log_entry}))
@@ -1507,7 +1888,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
 
     /// Remove redundant logs from memory
     /// Everything >= index must be removed
-    entry_storage.eraseIf([index](const auto current_index) { return current_index >= index; });
+    entry_storage.cleanAfter(index - 1);
 
     /// Now we can actually override entry at index
     appendEntry(index, log_entry);
@@ -1576,8 +1957,9 @@ void Changelog::compact(uint64_t up_to_log_index)
     }
     /// Compaction from the past is possible, so don't make our min_log_id smaller.
     min_log_id = std::max(min_log_id, up_to_log_index + 1);
+    entry_storage.cacheFirstLog(min_log_id);
 
-    entry_storage.eraseIf([up_to_log_index](const auto index) { return index <= up_to_log_index; });
+    entry_storage.cleanUpTo(up_to_log_index + 1);
 
     if (need_rotate)
         current_writer->rotate(up_to_log_index + 1);
@@ -1602,7 +1984,7 @@ LogEntriesPtr Changelog::getLogEntriesBetween(uint64_t start, uint64_t end)
     return entry_storage.getLogEntriesBetween(start, end);
 }
 
-LogEntryPtr Changelog::entryAt(uint64_t index)
+LogEntryPtr Changelog::entryAt(uint64_t index) const
 {
     return entry_storage.getEntry(index);
 }
@@ -1655,13 +2037,19 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
         buffer.get(buf_local);
 
         LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
-        if (i == 0 && entry_storage.contains(cur_index))
+        if (i == 0 && cur_index >= min_log_id && cur_index <= max_log_id)
             writeAt(cur_index, log_entry);
         else
             appendEntry(cur_index, log_entry);
     }
 }
 
+bool Changelog::isConfLog(uint64_t index) const
+{
+    return entry_storage.isConfLog(index);
+
+}
+
 bool Changelog::flush()
 {
     if (auto failed_ptr = flushAsync())
@@ -1687,7 +2075,7 @@ std::shared_ptr<bool> Changelog::flushAsync()
 
     if (!pushed)
     {
-        LOG_WARNING(log, "Changelog is shut down");
+        LOG_INFO(log, "Changelog is shut down");
         return nullptr;
     }
 
@@ -1697,29 +2085,32 @@ std::shared_ptr<bool> Changelog::flushAsync()
 
 void Changelog::shutdown()
 {
+    LOG_DEBUG(log, "Shutting down Changelog");
     if (!log_files_to_delete_queue.isFinished())
         log_files_to_delete_queue.finish();
 
-    if (clean_log_thread.joinable())
-        clean_log_thread.join();
+    if (clean_log_thread->joinable())
+        clean_log_thread->join();
 
     if (!write_operations.isFinished())
         write_operations.finish();
 
-    if (write_thread.joinable())
-        write_thread.join();
+    if (write_thread->joinable())
+        write_thread->join();
 
     if (!append_completion_queue.isFinished())
         append_completion_queue.finish();
 
-    if (append_completion_thread.joinable())
-        append_completion_thread.join();
+    if (append_completion_thread->joinable())
+        append_completion_thread->join();
 
     if (current_writer)
     {
         current_writer->finalize();
         current_writer.reset();
     }
+
+    entry_storage.shutdown();
 }
 
 Changelog::~Changelog()
@@ -1768,4 +2159,22 @@ bool Changelog::isInitialized() const
     return initialized;
 }
 
+void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const
+{
+    if (size() > 0)
+    {
+        log_info.first_log_idx = getStartIndex();
+        auto first_entry = entryAt(log_info.first_log_idx);
+        chassert(first_entry != nullptr);
+        log_info.first_log_term = first_entry->get_term();
+
+        log_info.last_log_idx = max_log_id;
+        auto last_entry = entryAt(log_info.first_log_idx);
+        chassert(last_entry != nullptr);
+        log_info.last_log_term = last_entry->get_term();
+    }
+
+    entry_storage.getKeeperLogInfo(log_info);
+}
+
 }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 85ff2c48191..e4c3117e6cf 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -1,16 +1,25 @@
 #pragma once
 
-#include <city.h>
-#include <Disks/IDisk.h>
-#include <IO/CompressionMethod.h>
-#include <IO/HashingWriteBuffer.h>
-#include <IO/WriteBufferFromFile.h>
-#include <base/defines.h>
-#include <libnuraft/nuraft.hxx>
-#include <libnuraft/raft_server.hxx>
+#include <libnuraft/ptr.hxx>
+#include <Common/ThreadPool_fwd.h>
 #include <Common/ConcurrentBoundedQueue.h>
-#include <Common/ThreadPool.h>
-#include <Coordination/KeeperContext.h>
+
+#include <map>
+#include <unordered_set>
+
+namespace nuraft
+{
+    struct log_entry;
+    struct buffer;
+    struct raft_server;
+}
+
+namespace Poco
+{
+    class Logger;
+}
+
+using LoggerPtr = std::shared_ptr<Poco::Logger>;
 
 namespace DB
 {
@@ -22,8 +31,11 @@ using LogEntries = std::vector<LogEntryPtr>;
 using LogEntriesPtr = nuraft::ptr<LogEntries>;
 using BufferPtr = nuraft::ptr<nuraft::buffer>;
 
-using IndexToLogEntry = std::unordered_map<uint64_t, LogEntryPtr>;
-using IndexToLogEntryNode = typename IndexToLogEntry::node_type;
+struct KeeperLogInfo;
+class KeeperContext;
+using KeeperContextPtr = std::shared_ptr<KeeperContext>;
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
 
 enum class ChangelogVersion : uint8_t
 {
@@ -97,20 +109,38 @@ struct LogLocation
     size_t size;
 };
 
+struct CacheEntry
+{
+    explicit CacheEntry(LogEntryPtr entry_);
+
+    LogEntryPtr entry = nullptr;
+    std::atomic<bool> is_prefetched = false;
+    mutable std::mutex entry_mutex;
+    mutable std::condition_variable entry_prefetched_cv;
+    std::exception_ptr exception;
+};
+
+using IndexToCacheEntry = std::unordered_map<uint64_t, CacheEntry>;
+using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type;
+
+
 struct LogEntryStorage
 {
-    explicit LogEntryStorage(const LogFileSettings & log_settings);
+    explicit LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_);
 
-    size_t size() const;
+    ~LogEntryStorage();
 
     void addEntry(uint64_t index, const LogEntryPtr & log_entry);
     void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location);
-    void eraseIf(std::function<bool(size_t)> index_predicate);
+    void cleanUpTo(uint64_t index);
+    void cleanAfter(uint64_t index);
     bool contains(uint64_t index) const;
     LogEntryPtr getEntry(uint64_t index) const;
     void clear();
     LogEntryPtr getLatestConfigChange() const;
 
+    void cacheFirstLog(uint64_t first_index);
+
     using IndexWithLogLocation = std::pair<uint64_t, LogLocation>;
 
     void addLogLocations(std::vector<IndexWithLogLocation> indices_with_log_locations);
@@ -118,19 +148,40 @@ struct LogEntryStorage
     void refreshCache();
 
     LogEntriesPtr getLogEntriesBetween(uint64_t start, uint64_t end) const;
+
+    void getKeeperLogInfo(KeeperLogInfo & log_info) const;
+
+    bool isConfLog(uint64_t index) const;
+    
+    void shutdown();
 private:
+    void prefetchCommitLogs();
+
+    void startCommitLogsPrefetch(uint64_t last_committed_index) const;
+
+    bool shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size);
+
     struct InMemoryCache
     {
         explicit InMemoryCache(size_t size_threshold_);
 
-        void addEntry(uint64_t index, const LogEntryPtr & log_entry);
-        void addEntry(IndexToLogEntryNode && node);
-        IndexToLogEntryNode popOldestEntry();
+        void addEntry(uint64_t index, LogEntryPtr log_entry);
+        void addEntry(IndexToCacheEntryNode && node);
+        void addPrefetchedEntry(uint64_t index, size_t size);
+        void setPrefetchedEntry(uint64_t index, LogEntryPtr log_entry, std::exception_ptr exception);
+        void updateStatsWithNewEntry(uint64_t index, size_t size);
+        IndexToCacheEntryNode popOldestEntry();
+        bool containsEntry(uint64_t index) const;
         LogEntryPtr getEntry(uint64_t index) const;
+        void cleanUpTo(uint64_t index);
+        void cleanAfter(uint64_t index);
+        bool empty() const;
+        size_t numberOfEntries() const;
         bool hasSpaceAvailable(size_t log_entry_size) const;
+        void clear();
 
         /// Mapping log_id -> log_entry
-        IndexToLogEntry cache;
+        IndexToCacheEntry cache;
         size_t cache_size = 0;
         size_t min_index_in_cache = 0;
         size_t max_index_in_cache = 0;
@@ -139,14 +190,44 @@ private:
     };
 
     InMemoryCache latest_logs_cache;
-    InMemoryCache commit_logs_cache;
+    mutable InMemoryCache commit_logs_cache;
 
-    size_t total_entries = 0;
+    LogEntryPtr latest_config;
+    uint64_t latest_config_index = 0;
+
+    LogEntryPtr first_log_entry;
+    uint64_t first_log_index = 0;
+
+    std::unique_ptr<ThreadFromGlobalPool> commit_logs_prefetcher;
+
+    struct FileReadInfo
+    {
+        ChangelogFileDescriptionPtr file_description;
+        size_t position;
+        size_t count;
+    };
+
+    struct PrefetchInfo
+    {
+        std::vector<FileReadInfo> file_infos;
+        std::pair<size_t, size_t> commit_prefetch_index_range;
+        std::atomic<bool> cancel;
+        std::atomic<bool> done = false;
+    };
+
+    mutable ConcurrentBoundedQueue<std::shared_ptr<PrefetchInfo>> prefetch_queue;
+    mutable std::shared_ptr<PrefetchInfo> current_prefetch_info;
 
     mutable std::mutex logs_location_mutex;
     std::vector<IndexWithLogLocation> unapplied_indices_with_log_locations;
     std::unordered_map<uint64_t, LogLocation> logs_location;
     size_t max_index_with_location = 0;
+
+    std::unordered_set<uint64_t> conf_logs_indices;
+
+    bool is_shutdown = false;
+    KeeperContextPtr keeper_context;
+    LoggerPtr log;
 };
 
 /// Simplest changelog with files rotation.
@@ -190,7 +271,7 @@ public:
     LogEntriesPtr getLogEntriesBetween(uint64_t start_index, uint64_t end_index);
 
     /// Return entry at position index
-    LogEntryPtr entryAt(uint64_t index);
+    LogEntryPtr entryAt(uint64_t index) const;
 
     /// Serialize entries from index into buffer
     BufferPtr serializeEntriesToBuffer(uint64_t index, int32_t count);
@@ -198,6 +279,8 @@ public:
     /// Apply entries from buffer overriding existing entries
     void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer);
 
+    bool isConfLog(uint64_t index) const;
+
     /// Fsync latest log to disk and flush buffer
     bool flush();
 
@@ -205,7 +288,7 @@ public:
 
     void shutdown();
 
-    uint64_t size() const { return entry_storage.size(); }
+    uint64_t size() const { return max_log_id - min_log_id + 1; }
 
     uint64_t lastDurableIndex() const
     {
@@ -217,6 +300,8 @@ public:
 
     bool isInitialized() const;
 
+    void getKeeperLogInfo(KeeperLogInfo & log_info) const;
+
     /// Fsync log to disk
     ~Changelog();
 
@@ -255,6 +340,8 @@ private:
 
     LogEntryStorage entry_storage;
 
+    std::unordered_set<uint64_t> conf_logs_indices;
+
     /// Start log_id which exists in all "active" logs
     /// min_log_id + 1 == max_log_id means empty log storage for NuRaft
     uint64_t min_log_id = 0;
@@ -262,7 +349,7 @@ private:
     /// For compaction, queue of delete not used logs
     /// 128 is enough, even if log is not removed, it's not a problem
     ConcurrentBoundedQueue<std::pair<std::string, DiskPtr>> log_files_to_delete_queue{128};
-    ThreadFromGlobalPool clean_log_thread;
+    std::unique_ptr<ThreadFromGlobalPool> clean_log_thread;
 
     struct AppendLog
     {
@@ -280,7 +367,7 @@ private:
 
     void writeThread();
 
-    ThreadFromGlobalPool write_thread;
+    std::unique_ptr<ThreadFromGlobalPool> write_thread;
     ConcurrentBoundedQueue<WriteOperation> write_operations;
 
     /// Append log completion callback tries to acquire NuRaft's global lock
@@ -289,7 +376,7 @@ private:
     /// For those reasons we call the completion callback in a different thread
     void appendCompletionThread();
 
-    ThreadFromGlobalPool append_completion_thread;
+    std::unique_ptr<ThreadFromGlobalPool> append_completion_thread;
     ConcurrentBoundedQueue<bool> append_completion_queue;
 
     // last_durable_index needs to be exposed through const getter so we make mutex mutable
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 2436d730ae4..61ecd40ee7f 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -34,6 +34,11 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
             e.addMessage("in Coordination settings config");
         throw;
     }
+
+    /// for backwards compatibility we set max_requests_append_size to max_requests_batch_size
+    /// if max_requests_append_size was not changed
+    if (!max_requests_append_size.changed)
+        max_requests_append_size = max_requests_batch_size;
 }
 
 
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 0ef9f21d2f2..cf1dcb6d2b1 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -41,6 +41,7 @@ struct Settings;
     M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
     M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
     M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
+    M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \
     M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
     M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
     M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
@@ -53,8 +54,8 @@ struct Settings;
     M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
     M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
     M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
-    M(UInt64, latest_logs_cache_size_threshold, 50 * 1024 * 1024, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
-    M(UInt64, commit_logs_cache_size_threshold, 10 * 1024 * 1024, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0)
+    M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
+    M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0)
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
 
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 4862acd448f..18d12cef8e2 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -561,6 +561,12 @@ String LogInfoCommand::run()
     append("leader_committed_log_idx", log_info.leader_committed_log_idx);
     append("target_committed_log_idx", log_info.target_committed_log_idx);
     append("last_snapshot_idx", log_info.last_snapshot_idx);
+
+    append("latest_logs_cache_entries", log_info.latest_logs_cache_entries);
+    append("latest_logs_cache_size", log_info.latest_logs_cache_size);
+
+    append("commit_logs_cache_entries", log_info.commit_logs_cache_entries);
+    append("commit_logs_cache_size", log_info.commit_logs_cache_size);
     return ret.str();
 }
 
diff --git a/src/Coordination/Keeper4LWInfo.h b/src/Coordination/Keeper4LWInfo.h
index f99be0682ce..80b00b3f36e 100644
--- a/src/Coordination/Keeper4LWInfo.h
+++ b/src/Coordination/Keeper4LWInfo.h
@@ -52,16 +52,16 @@ struct Keeper4LWInfo
 struct KeeperLogInfo
 {
     /// My first log index in log store.
-    uint64_t first_log_idx;
+    uint64_t first_log_idx{0};
 
     /// My first log term.
-    uint64_t first_log_term;
+    uint64_t first_log_term{0};
 
     /// My last log index in log store.
-    uint64_t last_log_idx;
+    uint64_t last_log_idx{0};
 
     /// My last log term.
-    uint64_t last_log_term;
+    uint64_t last_log_term{0};
 
     /// My last committed log index in state machine.
     uint64_t last_committed_log_idx;
@@ -74,6 +74,12 @@ struct KeeperLogInfo
 
     /// The largest committed log index in last snapshot.
     uint64_t last_snapshot_idx;
+
+    uint64_t latest_logs_cache_entries;
+    uint64_t latest_logs_cache_size;
+
+    uint64_t commit_logs_cache_entries;
+    uint64_t commit_logs_cache_size;
 };
 
 }
diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp
index ce7c715237e..e9a43ba1eff 100644
--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@@ -66,6 +66,12 @@ nuraft::ptr<nuraft::log_entry> KeeperLogStore::entry_at(uint64_t index)
     return changelog.entryAt(index);
 }
 
+bool KeeperLogStore::is_conf(uint64_t index)
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.isConfLog(index);
+}
+
 uint64_t KeeperLogStore::term_at(uint64_t index)
 {
     std::lock_guard lock(changelog_lock);
@@ -145,4 +151,10 @@ void KeeperLogStore::setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft
     return changelog.setRaftServer(raft_server);
 }
 
+void KeeperLogStore::getKeeperLogInfo(KeeperLogInfo & log_info) const
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.getKeeperLogInfo(log_info);
+}
+
 }
diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h
index aa277f19d88..21d9479ee47 100644
--- a/src/Coordination/KeeperLogStore.h
+++ b/src/Coordination/KeeperLogStore.h
@@ -1,10 +1,10 @@
 #pragma once
 #include <libnuraft/log_store.hxx>
-#include <map>
 #include <mutex>
 #include <Core/Types.h>
 #include <Coordination/Changelog.h>
 #include <Coordination/KeeperContext.h>
+#include <Coordination/Keeper4LWInfo.h>
 #include <base/defines.h>
 
 namespace DB
@@ -38,6 +38,8 @@ public:
     /// Return entry at index
     nuraft::ptr<nuraft::log_entry> entry_at(uint64_t index) override;
 
+    bool is_conf(uint64_t index) override;
+
     /// Term if the index
     uint64_t term_at(uint64_t index) override;
 
@@ -72,6 +74,8 @@ public:
 
     void setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft_server);
 
+    void getKeeperLogInfo(KeeperLogInfo & log_info) const;
+
 private:
     mutable std::mutex changelog_lock;
     LoggerPtr log;
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 722b1303cc8..04126230263 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1,6 +1,7 @@
 #include <Coordination/Defines.h>
 #include <Coordination/KeeperServer.h>
 
+#include "Coordination/KeeperLogStore.h"
 #include "config.h"
 
 #include <chrono>
@@ -134,7 +135,7 @@ KeeperServer::KeeperServer(
         snapshots_queue_,
         coordination_settings,
         keeper_context,
-        config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr,
+        config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr,
         commit_callback,
         checkAndGetSuperdigest(configuration_and_settings_->super_digest));
 
@@ -332,7 +333,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
     params.auto_forwarding_req_timeout_
         = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log);
     params.max_append_size_
-        = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_batch_size, "max_requests_batch_size", log);
+        = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_append_size, "max_requests_append_size", log);
 
     params.return_method_ = nuraft::raft_params::async_handler;
 
@@ -427,6 +428,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
 {
     state_machine->init();
 
+    keeper_context->setLastCommitIndex(state_machine->last_commit_index());
     state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items);
 
     auto log_store = state_manager->load_log_store();
@@ -1125,14 +1127,12 @@ KeeperLogInfo KeeperServer::getKeeperLogInfo()
     auto log_store = state_manager->load_log_store();
     if (log_store)
     {
-        log_info.first_log_idx = log_store->start_index();
-        log_info.first_log_term = log_store->term_at(log_info.first_log_idx);
+        const auto & keeper_log_storage = static_cast<const KeeperLogStore &>(*log_store);
+        keeper_log_storage.getKeeperLogInfo(log_info);
     }
 
     if (raft_instance)
     {
-        log_info.last_log_idx = raft_instance->get_last_log_idx();
-        log_info.last_log_term = raft_instance->get_last_log_term();
         log_info.last_committed_log_idx = raft_instance->get_committed_log_idx();
         log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx();
         log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx();
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index f53b8031712..cbe89f53526 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -594,7 +594,7 @@ KeeperSnapshotManager::KeeperSnapshotManager(
 
             if (!inserted)
                 LOG_WARNING(
-                    getLogger("KeeperSnapshotManager"),
+                    log,
                     "Found another snapshots with last log idx {}, will use snapshot from disk {}",
                     snapshot_up_to,
                     disk->getName());
@@ -603,6 +603,9 @@ KeeperSnapshotManager::KeeperSnapshotManager(
         for (const auto & [name, path] : incomplete_files)
             disk->removeFile(path);
 
+        if (snapshot_files.empty())
+            LOG_TRACE(log, "No snapshots were found on {}", disk->getName());
+
         read_disks.insert(disk);
     };
 
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index 8d50f0a76b1..9acd0cb541c 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -11,6 +11,7 @@
 #include <base/errnoToString.h>
 #include <base/move_extend.h>
 #include <sys/mman.h>
+#include "Common/Exception.h"
 #include <Common/ProfileEvents.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
@@ -139,16 +140,18 @@ void assertDigest(
     const KeeperStorage::Digest & first,
     const KeeperStorage::Digest & second,
     const Coordination::ZooKeeperRequest & request,
+    uint64_t log_idx,
     bool committing)
 {
     if (!KeeperStorage::checkDigest(first, second))
     {
         LOG_FATAL(
             getLogger("KeeperStateMachine"),
-            "Digest for nodes is not matching after {} request of type '{}'.\nExpected digest - {}, actual digest - {} (digest "
-            "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}",
+            "Digest for nodes is not matching after {} request of type '{}' at log index {}.\nExpected digest - {}, actual digest - {} "
+            "(digest {}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}",
             committing ? "committing" : "preprocessing",
             request.getOpNum(),
+            log_idx,
             first.value,
             second.value,
             first.version,
@@ -296,12 +299,12 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
     }
     catch (...)
     {
-        tryLogCurrentException(__PRETTY_FUNCTION__, "Failed to preprocess stored log, aborting to avoid inconsistent state");
+        tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to preprocess stored log at index {}, aborting to avoid inconsistent state", request_for_session.log_idx));
         std::abort();
     }
 
     if (keeper_context->digestEnabled() && request_for_session.digest)
-        assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, false);
+        assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, request_for_session.log_idx, false);
 
     return true;
 }
@@ -408,48 +411,57 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
         }
     };
 
-    const auto op_num = request_for_session->request->getOpNum();
-    if (op_num == Coordination::OpNum::SessionID)
+    try
     {
-        const Coordination::ZooKeeperSessionIDRequest & session_id_request
-            = dynamic_cast<const Coordination::ZooKeeperSessionIDRequest &>(*request_for_session->request);
-        int64_t session_id;
-        std::shared_ptr<Coordination::ZooKeeperSessionIDResponse> response = std::make_shared<Coordination::ZooKeeperSessionIDResponse>();
-        response->internal_id = session_id_request.internal_id;
-        response->server_id = session_id_request.server_id;
-        KeeperStorage::ResponseForSession response_for_session;
-        response_for_session.session_id = -1;
-        response_for_session.response = response;
-
-        std::lock_guard lock(storage_and_responses_lock);
-        session_id = storage->getSessionID(session_id_request.session_timeout_ms);
-        LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
-        response->session_id = session_id;
-        try_push(response_for_session);
-    }
-    else
-    {
-        if (op_num == Coordination::OpNum::Close)
+        const auto op_num = request_for_session->request->getOpNum();
+        if (op_num == Coordination::OpNum::SessionID)
         {
-            std::lock_guard lock(request_cache_mutex);
-            parsed_request_cache.erase(request_for_session->session_id);
+            const Coordination::ZooKeeperSessionIDRequest & session_id_request
+                = dynamic_cast<const Coordination::ZooKeeperSessionIDRequest &>(*request_for_session->request);
+            int64_t session_id;
+            std::shared_ptr<Coordination::ZooKeeperSessionIDResponse> response = std::make_shared<Coordination::ZooKeeperSessionIDResponse>();
+            response->internal_id = session_id_request.internal_id;
+            response->server_id = session_id_request.server_id;
+            KeeperStorage::ResponseForSession response_for_session;
+            response_for_session.session_id = -1;
+            response_for_session.response = response;
+
+            std::lock_guard lock(storage_and_responses_lock);
+            session_id = storage->getSessionID(session_id_request.session_timeout_ms);
+            LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
+            response->session_id = session_id;
+            try_push(response_for_session);
+        }
+        else
+        {
+            if (op_num == Coordination::OpNum::Close)
+            {
+                std::lock_guard lock(request_cache_mutex);
+                parsed_request_cache.erase(request_for_session->session_id);
+            }
+
+            std::lock_guard lock(storage_and_responses_lock);
+            KeeperStorage::ResponsesForSessions responses_for_sessions
+                = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
+            for (auto & response_for_session : responses_for_sessions)
+                try_push(response_for_session);
+
+            if (keeper_context->digestEnabled() && request_for_session->digest)
+                assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true);
         }
 
-        std::lock_guard lock(storage_and_responses_lock);
-        KeeperStorage::ResponsesForSessions responses_for_sessions
-            = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
-        for (auto & response_for_session : responses_for_sessions)
-            try_push(response_for_session);
+        ProfileEvents::increment(ProfileEvents::KeeperCommits);
+        last_committed_idx = log_idx;
 
-        if (keeper_context->digestEnabled() && request_for_session->digest)
-            assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, true);
+        if (commit_callback)
+            commit_callback(log_idx, *request_for_session);
+    }
+    catch(...)
+    {
+        tryLogCurrentException(log, fmt::format("Failed to commit stored log at index {}", log_idx));
+        throw;
     }
 
-    ProfileEvents::increment(ProfileEvents::KeeperCommits);
-    last_committed_idx = log_idx;
-
-    if (commit_callback)
-        commit_callback(log_idx, *request_for_session);
     return nullptr;
 }
 
@@ -497,6 +509,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
 
     ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys);
     last_committed_idx = s.get_last_log_idx();
+    keeper_context->setLastCommitIndex(s.get_last_log_idx());
     return true;
 }
 

From 5947e5b5a85ac29d7cb6f7c2a377cda016aa8b64 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 31 Jan 2024 16:26:47 +0100
Subject: [PATCH 197/884] Use retry settings and fault injection

---
 .../System/StorageSystemZooKeeper.cpp         | 32 ++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 6aa85e6a9e9..7a2b830b088 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -13,6 +13,7 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/ZooKeeperRetries.h>
+#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
 #include <Common/typeid_cast.h>
 #include <Columns/ColumnSet.h>
 #include <Columns/ColumnConst.h>
@@ -429,6 +430,29 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
 {
     QueryStatusPtr query_status = context->getProcessListElement();
 
+    const auto & settings = context->getSettingsRef();
+    /// Use insert settings for now in order not to introduce new settings.
+    /// Hopefully insert settings will also be unified and replaced with some generic retry settings.
+    ZooKeeperRetriesInfo retries_seetings(
+        settings.insert_keeper_max_retries,
+        settings.insert_keeper_retry_initial_backoff_ms,
+        settings.insert_keeper_retry_max_backoff_ms);
+
+    ZooKeeperWithFaultInjection::Ptr zookeeper;
+    /// Handles reconnects when needed
+    auto get_zookeeper = [&] ()
+    {
+        if (!zookeeper || zookeeper->expired())
+        {
+            zookeeper = ZooKeeperWithFaultInjection::createInstance(
+                settings.insert_keeper_fault_injection_probability,
+                settings.insert_keeper_fault_injection_seed,
+                context->getZooKeeper(),
+                "", nullptr);
+        }
+        return zookeeper;
+    };
+
     if (paths.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "SELECT from system.zookeeper table must contain condition like path = 'path' "
@@ -476,8 +500,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
         }
 
         zkutil::ZooKeeper::MultiTryGetChildrenResponse list_responses;
-        ZooKeeperRetriesControl("", nullptr, ZooKeeperRetriesInfo(20, 1, 1000), query_status).retryLoop(
-            [&]() { list_responses = context->getZooKeeper()->tryGetChildren(paths_to_list); });
+        ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop(
+            [&]() { list_responses = get_zookeeper()->tryGetChildren(paths_to_list); });
 
         struct GetTask
         {
@@ -522,8 +546,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
         }
 
         zkutil::ZooKeeper::MultiTryGetResponse get_responses;
-        ZooKeeperRetriesControl("", nullptr, ZooKeeperRetriesInfo(20, 1, 1000), query_status).retryLoop(
-            [&]() { get_responses = context->getZooKeeper()->tryGet(paths_to_get); });
+        ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop(
+            [&]() { get_responses = get_zookeeper()->tryGet(paths_to_get); });
 
         for (size_t i = 0, size = get_tasks.size(); i < size; ++i)
         {

From 7c1e318f865c399b2301fc9decba34d183740a81 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 31 Jan 2024 16:27:32 +0100
Subject: [PATCH 198/884] Test retries

---
 .../02975_system_zookeeper_retries.reference  |  3 +++
 .../02975_system_zookeeper_retries.sql        | 21 +++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tests/queries/0_stateless/02975_system_zookeeper_retries.reference
 create mode 100644 tests/queries/0_stateless/02975_system_zookeeper_retries.sql

diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.reference b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference
new file mode 100644
index 00000000000..9a636ba56d0
--- /dev/null
+++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference
@@ -0,0 +1,3 @@
+/keeper	api_version
+/keeper	feature_flags
+1
diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.sql b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql
new file mode 100644
index 00000000000..c23a1ad41c9
--- /dev/null
+++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql
@@ -0,0 +1,21 @@
+-- Tags: zookeeper, no-parallel, no-fasttest
+
+SELECT path, name
+FROM system.zookeeper
+WHERE path = '/keeper'
+ORDER BY path, name
+SETTINGS
+  insert_keeper_retry_initial_backoff_ms = 1,
+  insert_keeper_retry_max_backoff_ms = 20,
+  insert_keeper_fault_injection_probability=0.5,
+  log_comment='02975_system_zookeeper_retries';
+
+
+SYSTEM FLUSH LOGS;
+
+-- Check that there where zk session failures
+SELECT ProfileEvents['ZooKeeperHardwareExceptions'] > 0
+FROM system.query_log
+WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND log_comment='02975_system_zookeeper_retries'
+ORDER BY event_time_microseconds DESC
+LIMIT 1;

From d8b0cca03fd055ec2e544afded0006552e9e3ae9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 31 Jan 2024 19:14:57 +0100
Subject: [PATCH 199/884] Restart CI

---
 tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
index be04224bd52..1f00dd717eb 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -62,9 +62,9 @@ INSERT INTO t_leading_zeroes_f VALUES (2005, '0100', 0100, 100, 'Octal like, int
 INSERT INTO t_leading_zeroes_f VALUES (2006, '0000000000100', 0000000000100, 100, 'Octal like, interpret as decimal, multiple leading zeroes');
 
 -- Float negative zero is machine/context dependent
---INSERT INTO t_leading_zeroes_f VALUES (2010, '-0', -0, 0, 'Single zero negative');
---INSERT INTO t_leading_zeroes_f VALUES (2011, '-00', -00, 0, 'Double zero negative');
---INSERT INTO t_leading_zeroes_f VALUES (2012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
+-- INSERT INTO t_leading_zeroes_f VALUES (2010, '-0', -0, 0, 'Single zero negative');
+-- INSERT INTO t_leading_zeroes_f VALUES (2011, '-00', -00, 0, 'Double zero negative');
+-- INSERT INTO t_leading_zeroes_f VALUES (2012, '-000000000000000', -000000000000000, 0, 'Mutliple redundant zeroes negative');
 INSERT INTO t_leading_zeroes_f VALUES (2013, '-01', -01, -1, 'Octal like, interpret as decimal negative');
 INSERT INTO t_leading_zeroes_f VALUES (2014, '-08', -08, -8, 'Octal like, interpret as decimal negative');
 INSERT INTO t_leading_zeroes_f VALUES (2015, '-0100', -0100, -100, 'Octal like, interpret as decimal negative');

From d2ce39387f9a3ef09582a7a914952d76c69e77ac Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Tue, 5 Dec 2023 07:44:42 -0800
Subject: [PATCH 200/884] implemented series_outlier method

---
 .../functions/time-series-functions.md        |  51 ++++++
 src/Functions/seriesOutliersTukey.cpp         | 173 ++++++++++++++++++
 .../02813_seriesOutliersTukey.reference       |   3 +
 .../0_stateless/02813_seriesOutliersTukey.sql |  12 ++
 .../aspell-ignore/en/aspell-dict.txt          |   1 +
 5 files changed, 240 insertions(+)
 create mode 100644 src/Functions/seriesOutliersTukey.cpp
 create mode 100644 tests/queries/0_stateless/02813_seriesOutliersTukey.reference
 create mode 100644 tests/queries/0_stateless/02813_seriesOutliersTukey.sql

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index 016c3410944..4eb838721b2 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -8,6 +8,57 @@ sidebar_label: Time Series
 
 Below functions are used for time series analysis.
 
+## seriesOutliersTukey
+
+Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+
+**Syntax**
+
+``` sql
+seriesOutliersTukey(series);
+```
+
+**Arguments**
+
+- `series` - An array of numeric values
+
+**Returned value**
+
+- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
+- A value greater than 3 or lesser than -3 indicates a possible anomaly.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Examples**
+
+Query:
+
+``` sql
+seriesOutliersTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0──────────────────────────────────────────────────────────────────┐
+│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0────────────────────────────────────────────┐
+│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
+└──────────────────────────────────────────────────────────────┘
+```
+
 ## seriesPeriodDetectFFT
 
 Finds the period of the given time series data using FFT
diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp
new file mode 100644
index 00000000000..1291d5bf09c
--- /dev/null
+++ b/src/Functions/seriesOutliersTukey.cpp
@@ -0,0 +1,173 @@
+#include <cmath>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+extern const int ILLEGAL_COLUMN;
+}
+
+//Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
+class FunctionSeriesOutliersTukey : public IFunction
+{
+public:
+    static constexpr auto name = "seriesOutliersTukey";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersTukey>(); }
+
+    std::string getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
+        validateFunctionArgumentTypes(*this, arguments, args);
+
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
+    {
+        ColumnPtr array_ptr = arguments[0].column;
+        const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
+
+        const IColumn & src_data = array->getData();
+        const ColumnArray::Offsets & src_offsets = array->getOffsets();
+
+        ColumnPtr res;
+
+        if (executeNumber<UInt8>(src_data, src_offsets, res) || executeNumber<UInt16>(src_data, src_offsets, res)
+            || executeNumber<UInt32>(src_data, src_offsets, res) || executeNumber<UInt64>(src_data, src_offsets, res)
+            || executeNumber<Int8>(src_data, src_offsets, res) || executeNumber<Int16>(src_data, src_offsets, res)
+            || executeNumber<Int32>(src_data, src_offsets, res) || executeNumber<Int64>(src_data, src_offsets, res)
+            || executeNumber<Float32>(src_data, src_offsets, res) || executeNumber<Float64>(src_data, src_offsets, res))
+        {
+            return res;
+        }
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}",
+                arguments[0].column->getName(),
+                getName());
+    }
+
+    template <typename T>
+    bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, ColumnPtr & res_ptr) const
+    {
+        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
+        if (!src_data_concrete)
+            return false;
+
+        const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
+
+        auto outliers = ColumnFloat64::create();
+        auto & outlier_data = outliers->getData();
+
+        ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create();
+        auto & res_offsets_data = res_offsets->getData();
+
+        ColumnArray::Offset prev_src_offset = 0;
+        for (auto curr_src_offset : src_offsets)
+        {
+            chassert(prev_src_offset < curr_src_offset);
+            size_t len = curr_src_offset - prev_src_offset;
+            if (len < 4)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
+
+            std::vector<Float64> src_sorted(src_vec.begin() + prev_src_offset, src_vec.begin() + curr_src_offset);
+            std::sort(src_sorted.begin(), src_sorted.end());
+
+            size_t q1_index = len / 4;
+            size_t q3_index = (len * 3) / 4;
+
+            Float64 q1 = (len % 2 != 0) ? src_sorted[q1_index] : (src_sorted[q1_index - 1] + src_sorted[q1_index]) / 2;
+            Float64 q3 = (len % 2 != 0) ? src_sorted[q3_index] : (src_sorted[q3_index - 1] + src_sorted[q3_index]) / 2;
+
+            Float64 iqr = q3 - q1;
+
+            Float64 lower_fence = q1 - 1.5 * iqr;
+            Float64 upper_fence = q3 + 1.5 * iqr;
+
+            for (auto elem : src_vec)
+            {
+                auto score = std::min((elem - lower_fence) / iqr, 0.0) + std::max((elem - upper_fence) / iqr, 0.0);
+                outlier_data.push_back(score);
+            }
+            res_offsets_data.push_back(outlier_data.size());
+            prev_src_offset = curr_src_offset;
+        }
+
+        res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets));
+        return true;
+    }
+};
+
+REGISTER_FUNCTION(SeriesOutliersTukey)
+{
+    factory.registerFunction<FunctionSeriesOutliersTukey>(FunctionDocumentation{
+        .description = R"(
+Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+
+**Syntax**
+
+``` sql
+seriesOutliersTukey(series);
+```
+
+**Arguments**
+
+- `series` - An array of numeric values
+
+**Returned value**
+
+- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
+- A value greater than 3 or lesser than -3 indicates a possible anomaly.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Examples**
+
+Query:
+
+``` sql
+seriesOutliersTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0──────────────────────────────────────────────────────────────────┐
+│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0────────────────────────────────────────────┐
+│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
+└──────────────────────────────────────────────────────────────┘
+```)",
+        .categories{"Time series analysis"}});
+}
+}
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
new file mode 100644
index 00000000000..b370b4b5fe2
--- /dev/null
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
@@ -0,0 +1,3 @@
+[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
new file mode 100644
index 00000000000..4b24f1bffbb
--- /dev/null
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
@@ -0,0 +1,12 @@
+DROP TABLE IF EXISTS tb1;
+
+CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
+INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]);
+INSERT INTO tb1 VALUES (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
+
+SELECT seriesOutliersTukey(a) FROM tb1 ORDER BY n;
+DROP TABLE IF EXISTS tb1;
+SELECT seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30)));
+SELECT seriesOutliersTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesOutliersTukey([]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesOutliersTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS}
\ No newline at end of file
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 9f87255c9fa..64327aba2d1 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -939,6 +939,7 @@ TotalTemporaryFiles
 Tradeoff
 Transactional
 TwoColumnList
+Tukey
 UBSan
 UDFs
 UInt

From f4c14e15a2c4010b16240dfa4990a2f45c6d2f37 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 10 Jan 2024 09:20:49 -0800
Subject: [PATCH 201/884] added a fix for failing testcase

---
 src/Functions/seriesOutliersTukey.cpp                   | 4 ++--
 tests/queries/0_stateless/02813_seriesOutliersTukey.sql | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp
index 1291d5bf09c..4640258acc1 100644
--- a/src/Functions/seriesOutliersTukey.cpp
+++ b/src/Functions/seriesOutliersTukey.cpp
@@ -102,9 +102,9 @@ public:
             Float64 lower_fence = q1 - 1.5 * iqr;
             Float64 upper_fence = q3 + 1.5 * iqr;
 
-            for (auto elem : src_vec)
+            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j)
             {
-                auto score = std::min((elem - lower_fence) / iqr, 0.0) + std::max((elem - upper_fence) / iqr, 0.0);
+                auto score = std::min((src_vec[j] - lower_fence) / iqr, 0.0) + std::max((src_vec[j] - upper_fence) / iqr, 0.0);
                 outlier_data.push_back(score);
             }
             res_offsets_data.push_back(outlier_data.size());
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
index 4b24f1bffbb..baf8a229403 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
@@ -1,8 +1,7 @@
 DROP TABLE IF EXISTS tb1;
 
 CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
-INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]);
-INSERT INTO tb1 VALUES (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
+INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]), (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
 
 SELECT seriesOutliersTukey(a) FROM tb1 ORDER BY n;
 DROP TABLE IF EXISTS tb1;

From 90dfacd5a67e0addde4973484a4bc1944a6ca4b1 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 10 Jan 2024 09:20:49 -0800
Subject: [PATCH 202/884] added a fix for failing testcase

---
 src/Functions/seriesOutliersTukey.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp
index 4640258acc1..697f32f2046 100644
--- a/src/Functions/seriesOutliersTukey.cpp
+++ b/src/Functions/seriesOutliersTukey.cpp
@@ -83,7 +83,7 @@ public:
         ColumnArray::Offset prev_src_offset = 0;
         for (auto curr_src_offset : src_offsets)
         {
-            chassert(prev_src_offset < curr_src_offset);
+            chassert(prev_src_offset <= curr_src_offset);
             size_t len = curr_src_offset - prev_src_offset;
             if (len < 4)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());

From 487ef67e9759394de5ebfcc91749db5624d2fd03 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 18 Jan 2024 07:26:32 -0800
Subject: [PATCH 203/884] Added custom tukey

---
 .../functions/time-series-functions.md        |   8 +-
 src/Functions/seriesOutliersTukey.cpp         | 120 ++++++++++++++----
 .../02813_seriesOutliersTukey.reference       |   9 ++
 .../0_stateless/02813_seriesOutliersTukey.sql |  19 ++-
 4 files changed, 124 insertions(+), 32 deletions(-)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index 4eb838721b2..9eea5a8eb1f 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -8,14 +8,14 @@ sidebar_label: Time Series
 
 Below functions are used for time series analysis.
 
-## seriesOutliersTukey
+## seriesOutliersDetectTukey
 
 Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
 
 **Syntax**
 
 ``` sql
-seriesOutliersTukey(series);
+seriesOutliersDetectTukey(series);
 ```
 
 **Arguments**
@@ -34,7 +34,7 @@ Type: [Array](../../sql-reference/data-types/array.md).
 Query:
 
 ``` sql
-seriesOutliersTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
 ```
 
 Result:
@@ -48,7 +48,7 @@ Result:
 Query:
 
 ``` sql
-seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
 ```
 
 Result:
diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp
index 697f32f2046..72a16949605 100644
--- a/src/Functions/seriesOutliersTukey.cpp
+++ b/src/Functions/seriesOutliersTukey.cpp
@@ -1,5 +1,6 @@
 #include <cmath>
 #include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -16,16 +17,18 @@ extern const int ILLEGAL_COLUMN;
 }
 
 //Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
-class FunctionSeriesOutliersTukey : public IFunction
+class FunctionSeriesOutliersDetectTukey : public IFunction
 {
 public:
-    static constexpr auto name = "seriesOutliersTukey";
+    static constexpr auto name = "seriesOutliersDetectTukey";
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersTukey>(); }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersDetectTukey>(); }
 
     std::string getName() const override { return name; }
 
-    size_t getNumberOfArguments() const override { return 1; }
+    bool isVariadic() const override { return true; }
+
+    size_t getNumberOfArguments() const override { return 0; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
 
@@ -33,12 +36,20 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        FunctionArgumentDescriptors args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
-        validateFunctionArgumentTypes(*this, arguments, args);
+        FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
+        FunctionArgumentDescriptors optional_args{
+            {"kind", &isString<IDataType>, isColumnConst, "const String"},
+            {"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
+            {"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"}
+        };
+
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
     }
 
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1,2,3}; }
+
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
     {
         ColumnPtr array_ptr = arguments[0].column;
@@ -47,13 +58,54 @@ public:
         const IColumn & src_data = array->getData();
         const ColumnArray::Offsets & src_offsets = array->getOffsets();
 
+        Float64 min_percentile = 0.25;
+        Float64 max_percentile = 0.75;
+
+        if(arguments.size() > 1)
+        {
+            //const IColumn * arg_column = arguments[1].column.get();
+            const ColumnConst * arg_string = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
+
+            if (!arg_string)
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName());
+
+            String kind = arg_string->getValue<String>();
+            if(kind == "ctukey"){
+                min_percentile = 0.10; //default 10th percentile
+                max_percentile = 0.90; //default 90th percentile
+
+                if(arguments.size() > 2)
+                {
+                    Float64 p_min = arguments[2].column->getFloat64(0);
+                    if(p_min >= 2.0 && p_min <= 98.0)
+                        min_percentile = p_min/100;
+                    else
+                      throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());  
+                }
+
+                if(arguments.size() == 4)
+                {
+                    Float64 p_max = arguments[3].column->getFloat64(0);
+                    if(p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile*100)
+                        max_percentile = p_max/100;
+                    else
+                      throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());  
+                }
+            } 
+            else
+            {
+                if(kind != "tukey")
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
+            }
+        }
+
         ColumnPtr res;
 
-        if (executeNumber<UInt8>(src_data, src_offsets, res) || executeNumber<UInt16>(src_data, src_offsets, res)
-            || executeNumber<UInt32>(src_data, src_offsets, res) || executeNumber<UInt64>(src_data, src_offsets, res)
-            || executeNumber<Int8>(src_data, src_offsets, res) || executeNumber<Int16>(src_data, src_offsets, res)
-            || executeNumber<Int32>(src_data, src_offsets, res) || executeNumber<Int64>(src_data, src_offsets, res)
-            || executeNumber<Float32>(src_data, src_offsets, res) || executeNumber<Float64>(src_data, src_offsets, res))
+        if (executeNumber<UInt8>(src_data, src_offsets, min_percentile, max_percentile, res) || executeNumber<UInt16>(src_data, src_offsets,min_percentile, max_percentile, res)
+            || executeNumber<UInt32>(src_data, src_offsets, min_percentile, max_percentile,res) || executeNumber<UInt64>(src_data, src_offsets,min_percentile, max_percentile, res)
+            || executeNumber<Int8>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int16>(src_data, src_offsets,min_percentile, max_percentile, res)
+            || executeNumber<Int32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int64>(src_data, src_offsets, min_percentile, max_percentile,res)
+            || executeNumber<Float32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Float64>(src_data, src_offsets,min_percentile, max_percentile, res))
         {
             return res;
         }
@@ -66,7 +118,11 @@ public:
     }
 
     template <typename T>
-    bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, ColumnPtr & res_ptr) const
+    bool executeNumber(const IColumn & src_data, 
+    const ColumnArray::Offsets & src_offsets, 
+    Float64 min_percentile,
+    Float64 max_percentile,
+    ColumnPtr & res_ptr) const
     {
         const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
         if (!src_data_concrete)
@@ -91,16 +147,34 @@ public:
             std::vector<Float64> src_sorted(src_vec.begin() + prev_src_offset, src_vec.begin() + curr_src_offset);
             std::sort(src_sorted.begin(), src_sorted.end());
 
-            size_t q1_index = len / 4;
-            size_t q3_index = (len * 3) / 4;
+            Float64 q1, q2;
 
-            Float64 q1 = (len % 2 != 0) ? src_sorted[q1_index] : (src_sorted[q1_index - 1] + src_sorted[q1_index]) / 2;
-            Float64 q3 = (len % 2 != 0) ? src_sorted[q3_index] : (src_sorted[q3_index - 1] + src_sorted[q3_index]) / 2;
+            auto p1 = len * min_percentile;
+            if(p1 == static_cast<Int64>(p1)){
+                size_t index = static_cast<size_t>(p1)-1;
+                q1 = (src_sorted[index] + src_sorted[index+1])/2;
+            }
+            else
+            {   
+                size_t index = static_cast<size_t>(std::ceil(p1))-1;
+                q1 = src_sorted[index];
+            }
 
-            Float64 iqr = q3 - q1;
+            auto p2 = len * max_percentile;
+            if(p2 == static_cast<Int64>(p2)){
+                size_t index = static_cast<size_t>(p2)-1;
+                q2 = (src_sorted[index] + src_sorted[index+1])/2;
+            }
+            else
+            {   
+                size_t index = static_cast<size_t>(std::ceil(p2))-1;
+                q2 = src_sorted[index];
+            }
+
+            Float64 iqr = q2 - q1;
 
             Float64 lower_fence = q1 - 1.5 * iqr;
-            Float64 upper_fence = q3 + 1.5 * iqr;
+            Float64 upper_fence = q2 + 1.5 * iqr;
 
             for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j)
             {
@@ -116,16 +190,16 @@ public:
     }
 };
 
-REGISTER_FUNCTION(SeriesOutliersTukey)
+REGISTER_FUNCTION(SeriesOutliersDetectTukey)
 {
-    factory.registerFunction<FunctionSeriesOutliersTukey>(FunctionDocumentation{
+    factory.registerFunction<FunctionSeriesOutliersDetectTukey>(FunctionDocumentation{
         .description = R"(
 Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
 
 **Syntax**
 
 ``` sql
-seriesOutliersTukey(series);
+seriesOutliersDetectTukey(series);
 ```
 
 **Arguments**
@@ -144,7 +218,7 @@ Type: [Array](../../sql-reference/data-types/array.md).
 Query:
 
 ``` sql
-seriesOutliersTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
 ```
 
 Result:
@@ -158,7 +232,7 @@ Result:
 Query:
 
 ``` sql
-seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
 ```
 
 Result:
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
index b370b4b5fe2..990c8c11e9e 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
@@ -1,3 +1,12 @@
 [-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
+[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
+[-0.9615384615384613,0,1.9615384615384612,0,0,0,0,0,0.8076923076923075,19.26923076923077,0.8076923076923075,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,0.8076923076923077,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+[0,0,0,0]
 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
index baf8a229403..f8debc7b6db 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
@@ -3,9 +3,18 @@ DROP TABLE IF EXISTS tb1;
 CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
 INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]), (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
 
-SELECT seriesOutliersTukey(a) FROM tb1 ORDER BY n;
+SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n;
+SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n;
 DROP TABLE IF EXISTS tb1;
-SELECT seriesOutliersTukey(arrayMap(x -> sin(x / 10), range(30)));
-SELECT seriesOutliersTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
-SELECT seriesOutliersTukey([]); -- { serverError ILLEGAL_COLUMN}
-SELECT seriesOutliersTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS}
\ No newline at end of file
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6]);
+SELECT seriesOutliersDetectTukey([-3, 2.4, 15, 3.9, 5, 6, 4.5, 5.2, 12, 60, 12, 3.4, 3, 4, 5, 6, 3.4, 2.7]);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 25, 75);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 10, 90);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'tukey', 10, 90);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 2, 98)
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98);
+SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)));
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS}
+SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesOutliersDetectTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS}
\ No newline at end of file

From 4bc26fe45fa78fbfd1394d22f73a86cf1f83e586 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 31 Jan 2024 10:35:08 -0800
Subject: [PATCH 204/884] Added support for custom percentiles and K

---
 .../functions/time-series-functions.md        |  41 ++--
 src/Functions/seriesOutliersTukey.cpp         | 184 +++++++++++-------
 .../02813_seriesOutliersTukey.reference       |  20 +-
 .../0_stateless/02813_seriesOutliersTukey.sql |  26 ++-
 .../aspell-ignore/en/aspell-dict.txt          |   3 +
 5 files changed, 168 insertions(+), 106 deletions(-)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index 9eea5a8eb1f..bd50ef556f7 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -6,7 +6,7 @@ sidebar_label: Time Series
 
 # Time Series Functions
 
-Below functions are used for time series analysis.
+Below functions are used for series data analysis.
 
 ## seriesOutliersDetectTukey
 
@@ -16,16 +16,27 @@ Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.o
 
 ``` sql
 seriesOutliersDetectTukey(series);
+seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
 ```
 
 **Arguments**
 
-- `series` - An array of numeric values
+- `series` - An array of numeric values.
+- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
+- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
+
+At least four data points are required in `series` to detect outliers.
+
+Default quantile range:
+- `tukey` - 25%/75%
+- `ctukey` - 10%/90%
 
 **Returned value**
 
-- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
-- A value greater than 3 or lesser than -3 indicates a possible anomaly.
+- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
+- A non-zero score indicates a possible anomaly.
 
 Type: [Array](../../sql-reference/data-types/array.md).
 
@@ -34,34 +45,34 @@ Type: [Array](../../sql-reference/data-types/array.md).
 Query:
 
 ``` sql
-seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌───────────print_0──────────────────────────────────────────────────────────────────┐
-│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
-└────────────────────────────────────────────────────────────────────────────────────┘
+┌───────────print_0───────────────────┐
+│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
+└─────────────────────────────────────┘
 ```
 
 Query:
 
 ``` sql
-seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌───────────print_0────────────────────────────────────────────┐
-│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
-└──────────────────────────────────────────────────────────────┘
+┌─print_0────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
+└────────────────────────────────────┘
 ```
 
 ## seriesPeriodDetectFFT
 
-Finds the period of the given time series data using FFT
+Finds the period of the given series data data using FFT
 FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)
 
 **Syntax**
@@ -76,7 +87,7 @@ seriesPeriodDetectFFT(series);
 
 **Returned value**
 
-- A real value equal to the period of time series
+- A real value equal to the period of series data
 - Returns NAN when number of data points are less than four.
 
 Type: [Float64](../../sql-reference/data-types/float.md).
@@ -111,7 +122,7 @@ Result:
 
 ## seriesDecomposeSTL
 
-Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. 
+Decomposes a series data using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. 
 
 **Syntax**
 
diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp
index 72a16949605..4c2c1ccd882 100644
--- a/src/Functions/seriesOutliersTukey.cpp
+++ b/src/Functions/seriesOutliersTukey.cpp
@@ -1,4 +1,3 @@
-#include <cmath>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
@@ -7,6 +6,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
+#include <cmath>
 
 namespace DB
 {
@@ -16,7 +16,7 @@ extern const int BAD_ARGUMENTS;
 extern const int ILLEGAL_COLUMN;
 }
 
-//Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
+///Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
 class FunctionSeriesOutliersDetectTukey : public IFunction
 {
 public:
@@ -40,28 +40,28 @@ public:
         FunctionArgumentDescriptors optional_args{
             {"kind", &isString<IDataType>, isColumnConst, "const String"},
             {"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
-            {"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"}
-        };
+            {"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
+            {"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
 
         validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
     }
 
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1,2,3}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3, 4}; }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
     {
-        ColumnPtr array_ptr = arguments[0].column;
-        const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
+        ColumnPtr col = arguments[0].column;
+        const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
 
-        const IColumn & src_data = array->getData();
-        const ColumnArray::Offsets & src_offsets = array->getOffsets();
+        const IColumn & arr_data = col_arr->getData();
+        const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
 
-        Float64 min_percentile = 0.25;
-        Float64 max_percentile = 0.75;
+        Float64 min_percentile = 0.10; //default 10th percentile
+        Float64 max_percentile = 0.90; //default 90th percentile
 
-        if(arguments.size() > 1)
+        if (arguments.size() > 1)
         {
             //const IColumn * arg_column = arguments[1].column.get();
             const ColumnConst * arg_string = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
@@ -70,44 +70,62 @@ public:
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName());
 
             String kind = arg_string->getValue<String>();
-            if(kind == "ctukey"){
-                min_percentile = 0.10; //default 10th percentile
-                max_percentile = 0.90; //default 90th percentile
-
-                if(arguments.size() > 2)
+            if (kind == "ctukey")
+            {
+                if (arguments.size() > 2)
                 {
                     Float64 p_min = arguments[2].column->getFloat64(0);
-                    if(p_min >= 2.0 && p_min <= 98.0)
-                        min_percentile = p_min/100;
+                    if (p_min >= 2.0 && p_min <= 98.0)
+                        min_percentile = p_min / 100;
                     else
-                      throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());  
+                        throw Exception(
+                            ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());
                 }
 
-                if(arguments.size() == 4)
+                if (arguments.size() > 3)
                 {
                     Float64 p_max = arguments[3].column->getFloat64(0);
-                    if(p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile*100)
-                        max_percentile = p_max/100;
+                    if (p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile * 100)
+                        max_percentile = p_max / 100;
                     else
-                      throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());  
+                        throw Exception(
+                            ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());
                 }
-            } 
-            else
-            {
-                if(kind != "tukey")
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
             }
+            else if (kind == "tukey")
+            {
+                min_percentile = 0.25;
+                max_percentile = 0.75;
+            }
+            else
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
         }
 
-        ColumnPtr res;
-
-        if (executeNumber<UInt8>(src_data, src_offsets, min_percentile, max_percentile, res) || executeNumber<UInt16>(src_data, src_offsets,min_percentile, max_percentile, res)
-            || executeNumber<UInt32>(src_data, src_offsets, min_percentile, max_percentile,res) || executeNumber<UInt64>(src_data, src_offsets,min_percentile, max_percentile, res)
-            || executeNumber<Int8>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int16>(src_data, src_offsets,min_percentile, max_percentile, res)
-            || executeNumber<Int32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int64>(src_data, src_offsets, min_percentile, max_percentile,res)
-            || executeNumber<Float32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Float64>(src_data, src_offsets,min_percentile, max_percentile, res))
+        Float64 K = 1.50;
+        if (arguments.size() == 5)
         {
-            return res;
+            auto k_val = arguments[4].column->getFloat64(0);
+            if (k_val >= 0.0)
+                K = k_val;
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argumet of function {} must be a positive number", getName());
+        }
+
+        ColumnPtr col_res;
+
+        if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
+            || executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res))
+        {
+            return col_res;
         }
         else
             throw Exception(
@@ -117,14 +135,17 @@ public:
                 getName());
     }
 
+private:
     template <typename T>
-    bool executeNumber(const IColumn & src_data, 
-    const ColumnArray::Offsets & src_offsets, 
-    Float64 min_percentile,
-    Float64 max_percentile,
-    ColumnPtr & res_ptr) const
+    bool executeNumber(
+        const IColumn & arr_data,
+        const ColumnArray::Offsets & arr_offsets,
+        Float64 min_percentile,
+        Float64 max_percentile,
+        Float64 K,
+        ColumnPtr & res_ptr) const
     {
-        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
+        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&arr_data);
         if (!src_data_concrete)
             return false;
 
@@ -136,53 +157,57 @@ public:
         ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create();
         auto & res_offsets_data = res_offsets->getData();
 
+        std::vector<Float64> src_sorted;
+
         ColumnArray::Offset prev_src_offset = 0;
-        for (auto curr_src_offset : src_offsets)
+        for (auto src_offset : arr_offsets)
         {
-            chassert(prev_src_offset <= curr_src_offset);
-            size_t len = curr_src_offset - prev_src_offset;
+            chassert(prev_src_offset <= src_offset);
+            size_t len = src_offset - prev_src_offset;
             if (len < 4)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
 
-            std::vector<Float64> src_sorted(src_vec.begin() + prev_src_offset, src_vec.begin() + curr_src_offset);
+            src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset);
             std::sort(src_sorted.begin(), src_sorted.end());
 
             Float64 q1, q2;
 
             auto p1 = len * min_percentile;
-            if(p1 == static_cast<Int64>(p1)){
-                size_t index = static_cast<size_t>(p1)-1;
-                q1 = (src_sorted[index] + src_sorted[index+1])/2;
+            if (p1 == static_cast<Int64>(p1))
+            {
+                size_t index = static_cast<size_t>(p1) - 1;
+                q1 = (src_sorted[index] + src_sorted[index + 1]) / 2;
             }
             else
-            {   
-                size_t index = static_cast<size_t>(std::ceil(p1))-1;
+            {
+                size_t index = static_cast<size_t>(std::ceil(p1)) - 1;
                 q1 = src_sorted[index];
             }
 
             auto p2 = len * max_percentile;
-            if(p2 == static_cast<Int64>(p2)){
-                size_t index = static_cast<size_t>(p2)-1;
-                q2 = (src_sorted[index] + src_sorted[index+1])/2;
+            if (p2 == static_cast<Int64>(p2))
+            {
+                size_t index = static_cast<size_t>(p2) - 1;
+                q2 = (src_sorted[index] + src_sorted[index + 1]) / 2;
             }
             else
-            {   
-                size_t index = static_cast<size_t>(std::ceil(p2))-1;
+            {
+                size_t index = static_cast<size_t>(std::ceil(p2)) - 1;
                 q2 = src_sorted[index];
             }
 
-            Float64 iqr = q2 - q1;
+            Float64 iqr = q2 - q1; /// interquantile range
 
-            Float64 lower_fence = q1 - 1.5 * iqr;
-            Float64 upper_fence = q2 + 1.5 * iqr;
+            Float64 lower_fence = q1 - K * iqr;
+            Float64 upper_fence = q2 + K * iqr;
 
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j)
+            for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j)
             {
-                auto score = std::min((src_vec[j] - lower_fence) / iqr, 0.0) + std::max((src_vec[j] - upper_fence) / iqr, 0.0);
+                auto score = std::min((src_vec[j] - lower_fence), 0.0) + std::max((src_vec[j] - upper_fence), 0.0);
                 outlier_data.push_back(score);
             }
             res_offsets_data.push_back(outlier_data.size());
-            prev_src_offset = curr_src_offset;
+            prev_src_offset = src_offset;
         }
 
         res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets));
@@ -196,20 +221,31 @@ REGISTER_FUNCTION(SeriesOutliersDetectTukey)
         .description = R"(
 Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
 
+Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+
 **Syntax**
 
 ``` sql
 seriesOutliersDetectTukey(series);
+seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
 ```
 
 **Arguments**
 
-- `series` - An array of numeric values
+- `series` - An array of numeric values.
+- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
+- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
+
+Default quantile range:
+- `tukey` - 25%/75%
+- `ctukey` - 10%/90%
 
 **Returned value**
 
-- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
-- A value greater than 3 or lesser than -3 indicates a possible anomaly.
+- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
+- A non-zero score indicates a possible anomaly.
 
 Type: [Array](../../sql-reference/data-types/array.md).
 
@@ -218,29 +254,29 @@ Type: [Array](../../sql-reference/data-types/array.md).
 Query:
 
 ``` sql
-seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌───────────print_0──────────────────────────────────────────────────────────────────┐
-│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
-└────────────────────────────────────────────────────────────────────────────────────┘
+┌───────────print_0───────────────────┐
+│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
+└─────────────────────────────────────┘
 ```
 
 Query:
 
 ``` sql
-seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌───────────print_0────────────────────────────────────────────┐
-│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
-└──────────────────────────────────────────────────────────────┘
+┌─print_0────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
+└────────────────────────────────────┘
 ```)",
         .categories{"Time series analysis"}});
 }
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
index 990c8c11e9e..bdcde0419a4 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
@@ -1,12 +1,14 @@
-[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
-[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
-[-0.9615384615384613,0,1.9615384615384612,0,0,0,0,0,0.8076923076923075,19.26923076923077,0.8076923076923075,0,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,0.8076923076923077,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
+[-4.475000000000001,0,6.925000000000001,0,0,0,0,0,0,0,0,7.925000000000001,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,27.975,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,26.1,0,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 [0,0,0,0]
 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0]
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
index f8debc7b6db..7efe4903249 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
@@ -1,19 +1,29 @@
 DROP TABLE IF EXISTS tb1;
 
 CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
-INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]), (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
+INSERT INTO tb1 VALUES (1, [-3,2.40,15,3.90,5,6,4.50,5.20,3,4,5,16,7,5,5,4]), (2, [-3,2.40,15,3.90,5,6,4.50,5.20,12,45,12,3.40,3,4,5,6]);
 
+-- non-const inputs
 SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n;
 SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n;
 DROP TABLE IF EXISTS tb1;
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6]);
-SELECT seriesOutliersDetectTukey([-3, 2.4, 15, 3.9, 5, 6, 4.5, 5.2, 12, 60, 12, 3.4, 3, 4, 5, 6, 3.4, 2.7]);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 25, 75);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 10, 90);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'tukey', 10, 90);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 2, 98)
+
+-- const inputs
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6]);
+SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]);
+
+-- const inputs with optional arguments
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 25, 75);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 10, 90);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'tukey', 10, 90);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 2, 98);
 SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98);
-SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)));
+SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)), 'tukey');
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 1.5);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 3);
+
+-- negative tests
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, -1); -- { serverError BAD_ARGUMENTS}
 SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS}
 SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
 SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN}
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 64327aba2d1..3c8ec34de8d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1332,6 +1332,7 @@ cryptographic
 csv
 csvwithnames
 csvwithnamesandtypes
+ctukey
 curdate
 currentDatabase
 currentProfiles
@@ -2271,6 +2272,7 @@ seektable
 sequenceCount
 sequenceMatch
 sequenceNextNode
+seriesOutliersDetectTukey
 seriesDecomposeSTL
 seriesPeriodDetectFFT
 serverTimeZone
@@ -2564,6 +2566,7 @@ tryPunycodeDecode
 tskv
 tsv
 tui
+tukey
 tumbleEnd
 tumbleStart
 tupleConcat

From 6d24ffc9761d05422f0d98d3a4797d0c348e3303 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 31 Jan 2024 10:57:43 -0800
Subject: [PATCH 205/884] Fix filename and spelling errors

---
 ...eriesOutliersTukey.cpp => seriesOutliersDetectTukey.cpp} | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename src/Functions/{seriesOutliersTukey.cpp => seriesOutliersDetectTukey.cpp} (98%)

diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp
similarity index 98%
rename from src/Functions/seriesOutliersTukey.cpp
rename to src/Functions/seriesOutliersDetectTukey.cpp
index 4c2c1ccd882..ce5ed391fa0 100644
--- a/src/Functions/seriesOutliersTukey.cpp
+++ b/src/Functions/seriesOutliersDetectTukey.cpp
@@ -79,7 +79,7 @@ public:
                         min_percentile = p_min / 100;
                     else
                         throw Exception(
-                            ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());
+                            ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
                 }
 
                 if (arguments.size() > 3)
@@ -89,7 +89,7 @@ public:
                         max_percentile = p_max / 100;
                     else
                         throw Exception(
-                            ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());
+                            ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be in range [2.0, 98.0]", getName());
                 }
             }
             else if (kind == "tukey")
@@ -109,7 +109,7 @@ public:
             if (k_val >= 0.0)
                 K = k_val;
             else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argumet of function {} must be a positive number", getName());
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argument of function {} must be a positive number", getName());
         }
 
         ColumnPtr col_res;

From a4ea7c401cdd28b3196e5165c1bc7dc006f52d73 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 31 Jan 2024 21:06:47 +0100
Subject: [PATCH 206/884] 2 tests fail, doesnt seem change related - try again


From 361b2f107b33e89c6b6694e420cb668bc50d160b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 31 Jan 2024 21:28:18 +0100
Subject: [PATCH 207/884] Allow to define a starting point for s3queue ordered
 mode

---
 src/Storages/S3Queue/S3QueueFilesMetadata.cpp | 74 ++++++++++++-------
 src/Storages/S3Queue/S3QueueFilesMetadata.h   |  4 +
 src/Storages/S3Queue/S3QueueSettings.h        |  1 +
 src/Storages/S3Queue/StorageS3Queue.cpp       | 13 +++-
 .../integration/test_storage_s3_queue/test.py | 50 +++++++++++++
 5 files changed, 113 insertions(+), 29 deletions(-)

diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
index 61f6b7fe052..ac80ded5792 100644
--- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
@@ -637,25 +637,31 @@ void S3QueueFilesMetadata::setFileProcessedForUnorderedMode(ProcessingNodeHolder
                 "this could be a result of expired zookeeper session", path);
 }
 
+
 void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder)
+{
+    auto processed_node_path = isShardedProcessing()
+        ? zookeeper_processed_path / toString(getProcessingIdForPath(holder->path))
+        : zookeeper_processed_path;
+
+    return setFileProcessedForOrderedModeImpl(holder->path, holder, processed_node_path);
+}
+
+void S3QueueFilesMetadata::setFileProcessedForOrderedModeImpl(
+    const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path)
 {
     /// Update a persistent node in /processed and remove ephemeral node from /processing.
 
-    const auto & path = holder->path;
     const auto node_name = getNodeName(path);
     const auto node_metadata = createNodeMetadata(path).toString();
     const auto zk_client = getZooKeeper();
 
-    auto processed_node = isShardedProcessing()
-        ? zookeeper_processed_path / toString(getProcessingIdForPath(path))
-        : zookeeper_processed_path;
-
-    LOG_TEST(log, "Setting file `{}` as processed", path);
+    LOG_TEST(log, "Setting file `{}` as processed (at {})", path, processed_node_path);
     while (true)
     {
         std::string res;
         Coordination::Stat stat;
-        bool exists = zk_client->tryGet(processed_node, res, &stat);
+        bool exists = zk_client->tryGet(processed_node_path, res, &stat);
         Coordination::Requests requests;
         if (exists)
         {
@@ -664,39 +670,41 @@ void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPt
                 auto metadata = NodeMetadata::fromString(res);
                 if (metadata.file_path >= path)
                 {
-                    /// Here we get in the case that maximum processed file is bigger than ours.
-                    /// This is possible to achieve in case of parallel processing
-                    /// but for local processing we explicitly disable parallel mode and do everything in a single thread
-                    /// (see constructor of StorageS3Queue where s3queue_processing_threads_num is explicitly set to 1 in case of Ordered mode).
-                    /// Nevertheless, in case of distributed processing we cannot do anything with parallelism.
-                    /// What this means?
-                    /// It means that in scenario "distributed processing + Ordered mode"
-                    /// a setting s3queue_loading_retries will not work. It is possible to fix, it is in TODO.
-
-                    /// Return because there is nothing to change,
-                    /// the max processed file is already bigger than ours.
+                    LOG_TRACE(log, "File {} is already processed, current max processed file: {}", path, metadata.file_path);
                     return;
                 }
             }
-            requests.push_back(zkutil::makeSetRequest(processed_node, node_metadata, stat.version));
+            requests.push_back(zkutil::makeSetRequest(processed_node_path, node_metadata, stat.version));
         }
         else
         {
-            requests.push_back(zkutil::makeCreateRequest(processed_node, node_metadata, zkutil::CreateMode::Persistent));
+            requests.push_back(zkutil::makeCreateRequest(processed_node_path, node_metadata, zkutil::CreateMode::Persistent));
         }
 
         Coordination::Responses responses;
-        if (holder->remove(&requests, &responses))
+        if (holder)
         {
-            LOG_TEST(log, "Moved file `{}` to processed", path);
-            if (max_loading_retries)
-                zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1);
-            return;
+            if (holder->remove(&requests, &responses))
+            {
+                LOG_TEST(log, "Moved file `{}` to processed", path);
+                if (max_loading_retries)
+                    zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1);
+                return;
+            }
+        }
+        else
+        {
+            auto code = zk_client->tryMulti(requests, responses);
+            if (code == Coordination::Error::ZOK)
+                return;
         }
 
         /// Failed to update max processed node, retry.
         if (!responses.empty() && responses[0]->error != Coordination::Error::ZOK)
+        {
+            LOG_TRACE(log, "Failed to update processed node ({}). Will retry.", magic_enum::enum_name(responses[0]->error));
             continue;
+        }
 
         LOG_WARNING(log, "Cannot set file ({}) as processed since processing node "
                     "does not exist with expected processing id does not exist, "
@@ -705,6 +713,22 @@ void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPt
     }
 }
 
+void S3QueueFilesMetadata::setFileProcessed(const std::string & path, size_t shard_id)
+{
+    if (mode != S3QueueMode::ORDERED)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can set file as preprocessed only for Ordered mode");
+
+    if (isShardedProcessing())
+    {
+        for (const auto & processor : getProcessingIdsForShard(shard_id))
+            setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path / toString(processor));
+    }
+    else
+    {
+        setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path);
+    }
+}
+
 void S3QueueFilesMetadata::setFileFailed(ProcessingNodeHolderPtr holder, const String & exception_message)
 {
     auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileFailedMicroseconds);
diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h
index c83c6f20b92..9301ea7ceb8 100644
--- a/src/Storages/S3Queue/S3QueueFilesMetadata.h
+++ b/src/Storages/S3Queue/S3QueueFilesMetadata.h
@@ -42,6 +42,7 @@ public:
     ~S3QueueFilesMetadata();
 
     void setFileProcessed(ProcessingNodeHolderPtr holder);
+    void setFileProcessed(const std::string & path, size_t shard_id);
 
     void setFileFailed(ProcessingNodeHolderPtr holder, const std::string & exception_message);
 
@@ -141,6 +142,9 @@ private:
     void setFileProcessedForUnorderedMode(ProcessingNodeHolderPtr holder);
     std::string getZooKeeperPathForShard(size_t shard_id) const;
 
+    void setFileProcessedForOrderedModeImpl(
+        const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path);
+
     enum class SetFileProcessingResult
     {
         Success,
diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h
index d65b38f77f2..5c846b4f294 100644
--- a/src/Storages/S3Queue/S3QueueSettings.h
+++ b/src/Storages/S3Queue/S3QueueSettings.h
@@ -22,6 +22,7 @@ class ASTStorage;
     M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \
     M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \
     M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \
+    M(String, s3queue_last_processed_path, "", "For Ordered mode. Files with smaller file name are considered already processed", 0) \
     M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \
     M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \
     M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 23ef9aec980..5fc0d19ce0e 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -155,10 +155,6 @@ StorageS3Queue::StorageS3Queue(
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
     task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
 
-    /// Get metadata manager from S3QueueMetadataFactory,
-    /// it will increase the ref count for the metadata object.
-    /// The ref count is decreased when StorageS3Queue::drop() method is called.
-    files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings);
     try
     {
         createOrCheckMetadata(storage_metadata);
@@ -169,6 +165,11 @@ StorageS3Queue::StorageS3Queue(
         throw;
     }
 
+    /// Get metadata manager from S3QueueMetadataFactory,
+    /// it will increase the ref count for the metadata object.
+    /// The ref count is decreased when StorageS3Queue::drop() method is called.
+    files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings);
+
     if (files_metadata->isShardedProcessing())
     {
         if (!s3queue_settings->s3queue_current_shard_num.changed)
@@ -181,6 +182,10 @@ StorageS3Queue::StorageS3Queue(
             files_metadata->registerNewShard(s3queue_settings->s3queue_current_shard_num);
         }
     }
+    if (s3queue_settings->mode == S3QueueMode::ORDERED && !s3queue_settings->s3queue_last_processed_path.value.empty())
+    {
+        files_metadata->setFileProcessed(s3queue_settings->s3queue_last_processed_path.value, s3queue_settings->s3queue_current_shard_num);
+    }
 }
 
 void StorageS3Queue::startup()
diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 5e86b798bf7..8c20eb94fad 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -1283,3 +1283,53 @@ def test_settings_check(started_cluster):
     )
 
     node.query(f"DROP TABLE {table_name} SYNC")
+
+
+@pytest.mark.parametrize("processing_threads", [1, 5])
+def test_processed_file_setting(started_cluster, processing_threads):
+    node = started_cluster.instances["instance"]
+    table_name = f"test_processed_file_setting_{processing_threads}"
+    dst_table_name = f"{table_name}_dst"
+    keeper_path = f"/clickhouse/test_{table_name}"
+    files_path = f"{table_name}_data"
+    files_to_generate = 10
+
+    create_table(
+        started_cluster,
+        node,
+        table_name,
+        "ordered",
+        files_path,
+        additional_settings={
+            "keeper_path": keeper_path,
+            "s3queue_processing_threads_num": processing_threads,
+            "s3queue_last_processed_path": f"{files_path}/test_5.csv",
+        },
+    )
+    total_values = generate_random_files(
+        started_cluster, files_path, files_to_generate, start_ind=0, row_num=1
+    )
+
+    create_mv(node, table_name, dst_table_name)
+
+    def get_count():
+        return int(node.query(f"SELECT count() FROM {dst_table_name}"))
+
+    expected_rows = 4
+    for _ in range(20):
+        if expected_rows == get_count():
+            break
+        time.sleep(1)
+
+    assert expected_rows == get_count()
+
+    node.restart_clickhouse()
+    time.sleep(10)
+
+    expected_rows = 4
+    for _ in range(20):
+        if expected_rows == get_count():
+            break
+        time.sleep(1)
+
+    assert expected_rows == get_count()

From f7a5f09a0fc76662114a33c60bc3eb0aaebd5f1c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 31 Jan 2024 22:59:25 +0100
Subject: [PATCH 208/884] Add one more test

---
 .../integration/test_storage_s3_queue/test.py | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 8c20eb94fad..34146484ced 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -99,6 +99,7 @@ def started_cluster():
             main_configs=[
                 "configs/s3queue_log.xml",
             ],
+            stay_alive=True,
         )
 
         logging.info("Starting cluster...")
@@ -1333,3 +1334,58 @@ def test_processed_file_setting(started_cluster, processing_threads):
         time.sleep(1)
 
     assert expected_rows == get_count()
+
+
+@pytest.mark.parametrize("processing_threads", [1, 5])
+def test_processed_file_setting_distributed(started_cluster, processing_threads):
+    node = started_cluster.instances["instance"]
+    node_2 = started_cluster.instances["instance2"]
+    table_name = f"test_processed_file_setting_distributed_{processing_threads}"
+    dst_table_name = f"{table_name}_dst"
+    keeper_path = f"/clickhouse/test_{table_name}"
+    files_path = f"{table_name}_data"
+    files_to_generate = 10
+
+    for instance in [node, node_2]:
+        create_table(
+            started_cluster,
+            instance,
+            table_name,
+            "ordered",
+            files_path,
+            additional_settings={
+                "keeper_path": keeper_path,
+                "s3queue_processing_threads_num": processing_threads,
+                "s3queue_last_processed_path": f"{files_path}/test_5.csv",
+                "s3queue_total_shards_num": 2,
+            },
+        )
+
+    total_values = generate_random_files(
+        started_cluster, files_path, files_to_generate, start_ind=0, row_num=1
+    )
+
+    for instance in [node, node_2]:
+        create_mv(instance, table_name, dst_table_name)
+
+    def get_count():
+        query = f"SELECT count() FROM {dst_table_name}"
+        return int(node.query(query)) + int(node_2.query(query))
+
+    expected_rows = 4
+    for _ in range(20):
+        if expected_rows == get_count():
+            break
+        time.sleep(1)
+    assert expected_rows == get_count()
+
+    for instance in [node, node_2]:
+        instance.restart_clickhouse()
+
+    time.sleep(10)
+    expected_rows = 4
+    for _ in range(20):
+        if expected_rows == get_count():
+            break
+        time.sleep(1)
+    assert expected_rows == get_count()

From c09921c147afe7435f15c4e404e40b4b42ea3256 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 31 Jan 2024 23:10:22 +0100
Subject: [PATCH 209/884] Update test

---
 .../0_stateless/02981_nested_bad_types.sql       | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql
index 8c0d2308d8f..87bc80693c8 100644
--- a/tests/queries/0_stateless/02981_nested_bad_types.sql
+++ b/tests/queries/0_stateless/02981_nested_bad_types.sql
@@ -8,7 +8,6 @@ select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPIC
 select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
-select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 
 create table test (x Array(LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 create table test (x Array(Array(LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
@@ -18,7 +17,6 @@ create table test (x Tuple(String, LowCardinality(UInt64))) engine=Memory; -- {s
 create table test (x Tuple(String, Array(Map(String, LowCardinality(UInt64))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
 
 
-
 select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
 select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
 select [[['42']]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
@@ -26,8 +24,6 @@ select map('a', '42')::Map(String, FixedString(1000000)); -- {serverError ILLEGA
 select map('a', map('b', ['42']))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
 select tuple('a', '42')::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
 select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN}
-select '42'::Variant(UInt64, FixedString(1000000)) settings allow_experimental_variant_type=1; -- {serverError ILLEGAL_COLUMN}
-
 
 create table test (x Array(FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 create table test (x Array(Array(FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
@@ -36,8 +32,6 @@ create table test (x Map(String, Map(String, FixedString(1000000)))) engine=Memo
 create table test (x Tuple(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 
-
-select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
 select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
 select [[[42]]]::Array(Array(Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN}
 select map('a', 42)::Map(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN}
@@ -52,3 +46,13 @@ create table test (x Map(String, Map(String, Variant(String, UInt64)))) engine=M
 create table test (x Tuple(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 create table test (x Tuple(String, Array(Map(String, Variant(String, UInt64))))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
 
+set allow_experimental_variant_type=1;
+select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), UInt8)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Variant(LowCardinality(UInt64), UInt8)) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+create table test (x Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), UInt8))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY}
+
+select '42'::Variant(UInt64, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN}
+select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, Variant(UInt32, FixedString(1000000))))); -- {serverError ILLEGAL_COLUMN}
+create table test (x Variant(UInt64, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN}
+create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN}

From 2ca634702b3318441e8629985eee7d859d5e3d3f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jan 2024 23:15:51 +0100
Subject: [PATCH 210/884] Fix test

---
 tests/queries/0_stateless/02184_default_table_engine.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql
index a984ec1b6c9..aff30eeea98 100644
--- a/tests/queries/0_stateless/02184_default_table_engine.sql
+++ b/tests/queries/0_stateless/02184_default_table_engine.sql
@@ -1,3 +1,5 @@
+SET default_table_engine = 'None';
+
 CREATE TABLE table_02184 (x UInt8); --{serverError 119}
 SET default_table_engine = 'Log';
 CREATE TABLE table_02184 (x UInt8);

From 33f6940214282a9ec5908b6fd265f9e3962dad34 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 1 Feb 2024 03:50:09 +0100
Subject: [PATCH 211/884] CLion says these headers are unused

---
 src/Storages/MergeTree/MergeTreeData.cpp    |  9 ---------
 src/Storages/StorageReplicatedMergeTree.cpp | 12 ------------
 2 files changed, 21 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index a9f30e6e522..3ca746a7197 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -20,7 +20,6 @@
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
 #include <Disks/TemporaryFileOnDisk.h>
 #include <Disks/createVolume.h>
-#include <Functions/IFunction.h>
 #include <IO/Operators.h>
 #include <IO/S3Common.h>
 #include <IO/SharedThreadPools.h>
@@ -47,7 +46,6 @@
 #include <Parsers/ASTPartition.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
-#include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Processors/Formats/IInputFormat.h>
@@ -62,9 +60,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartCloner.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
-#include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/Statistics/Estimator.h>
-#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/MutationCommands.h>
@@ -75,12 +71,10 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/Increment.h>
 #include <Common/ProfileEventsScope.h>
-#include <Common/SimpleIncrement.h>
 #include <Common/Stopwatch.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/escapeForFileName.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/noexcept_scope.h>
 #include <Common/quoteString.h>
 #include <Common/scope_guard_safe.h>
@@ -91,13 +85,10 @@
 
 #include <base/insertAtEnd.h>
 #include <base/interpolate.h>
-#include <base/defines.h>
 
 #include <algorithm>
 #include <atomic>
-#include <cmath>
 #include <chrono>
-#include <iomanip>
 #include <limits>
 #include <optional>
 #include <ranges>
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 1224af4d6cb..6bd57cc4d6d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1,6 +1,5 @@
 #include <Core/Defines.h>
 
-#include <cstddef>
 #include <ranges>
 #include <chrono>
 
@@ -29,17 +28,14 @@
 #include <Storages/AlterCommands.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/Freeze.h>
-#include <Storages/MergeTree/AsyncBlockIDsCache.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/LeaderElection.h>
 #include <Storages/MergeTree/MergeFromLogEntryTask.h>
-#include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
-#include <Storages/MergeTree/MergeTreeReaderCompact.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MutateFromLogEntryTask.h>
 #include <Storages/MergeTree/PinnedPartUUIDs.h>
@@ -64,21 +60,16 @@
 #include <Parsers/formatAST.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/ASTInsertQuery.h>
-#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTFunction.h>
-#include <Parsers/ASTOptimizeQuery.h>
 #include <Parsers/ASTPartition.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/queryToString.h>
-#include <Parsers/ASTCheckQuery.h>
-#include <Parsers/ExpressionListParsers.h>
 
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
-#include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/Sinks/EmptySink.h>
 
 #include <Planner/Utils.h>
@@ -106,9 +97,6 @@
 #include <Backups/IRestoreCoordination.h>
 #include <Backups/RestorerFromBackup.h>
 
-#include <Poco/DirectoryIterator.h>
-
-#include <base/scope_guard.h>
 #include <Common/scope_guard_safe.h>
 
 #include <boost/algorithm/string/join.hpp>

From d9820f9704c3851cce6939f1bb25f9f83c8cdf2b Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 1 Feb 2024 11:13:26 +0800
Subject: [PATCH 212/884] change as request

---
 src/Functions/if.cpp                           | 18 +++++++++++-------
 .../0_stateless/02974_if_with_map.reference    | 10 ++++++++++
 .../queries/0_stateless/02974_if_with_map.sql  |  5 +++++
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index d7bcd1291bb..7108ecfbbf3 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1,4 +1,3 @@
-#include <type_traits>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnDecimal.h>
@@ -30,6 +29,8 @@
 #include <Common/assert_cast.h>
 #include <Common/typeid_cast.h>
 
+#include <type_traits>
+
 namespace DB
 {
 namespace ErrorCodes
@@ -841,18 +842,21 @@ private:
         const auto * value_array = assert_cast<const ColumnArray *>(value_result.get());
         if (!key_array)
             throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN, "Illegal key result column {} in executeMap for function {}", key_result->getName(), getName());
+                ErrorCodes::LOGICAL_ERROR,
+                "Key result column should be {} instead of {} in executeMap of function {}",
+                key_result_type->getName(),
+                key_result->getName(),
+                getName());
         if (!value_array)
             throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal value result column {} in executeMap for function {}",
+                ErrorCodes::LOGICAL_ERROR,
+                "Value result column should be {} instead of {} in executeMap of function {}",
+                key_result_type->getName(),
                 value_result->getName(),
                 getName());
         if (!key_array->hasEqualOffsets(*value_array))
             throw Exception(
-                ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
-                "Key result column and value result column in executeMap for function {} must have equal sizes",
-                getName());
+                ErrorCodes::LOGICAL_ERROR, "Key array and value array must have equal sizes in executeMap of function {}", getName());
 
         auto nested_column = ColumnArray::create(
             ColumnTuple::create(Columns{key_array->getDataPtr(), value_array->getDataPtr()}), key_array->getOffsetsPtr());
diff --git a/tests/queries/0_stateless/02974_if_with_map.reference b/tests/queries/0_stateless/02974_if_with_map.reference
index 00dca1f95a6..a4ebf6a9700 100644
--- a/tests/queries/0_stateless/02974_if_with_map.reference
+++ b/tests/queries/0_stateless/02974_if_with_map.reference
@@ -4,6 +4,16 @@
 {3:4,5:6}
 {3:4,5:6}
 {1:2,3:4}
+{3:4,5:6}
+{1:2,3:4}
+{1:2,3:4}
+{3:4}
+{1:2,3:4}
+{3:4}
+{3:4,5:6}
+{1:2}
+{3:4,5:6}
+{1:2}
 {1:2,3:4}
 {1:2,3:4}
 {3:4,5:6}
diff --git a/tests/queries/0_stateless/02974_if_with_map.sql b/tests/queries/0_stateless/02974_if_with_map.sql
index 182d20de07e..2387cffd4bf 100644
--- a/tests/queries/0_stateless/02974_if_with_map.sql
+++ b/tests/queries/0_stateless/02974_if_with_map.sql
@@ -1,6 +1,11 @@
 select if(number % 2 = 0, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
 select if(number % 2 = 0, materialize(map(1,2,3,4)), map(3,4,5,6)) from numbers(2);
 select if(number % 2 = 0, map(3,4,5,6), materialize(map(1,2,3,4))) from numbers(2);
+select if(number % 2 = 0, materialize(map(3,4,5,6)), materialize(map(1,2,3,4))) from numbers(2);
+select if(number % 2 = 0, map(1,2,3,4), map(3,4)) from numbers(2);
+select if(number % 2 = 0, materialize(map(1,2,3,4)), map(3,4)) from numbers(2);
+select if(number % 2 = 0, map(3,4,5,6), materialize(map(1,2))) from numbers(2);
+select if(number % 2 = 0, materialize(map(3,4,5,6)), materialize(map(1,2))) from numbers(2);
 select if(1, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
 select if(0, map(1,2,3,4), map(3,4,5,6)) from numbers(2);
 select if(null, map(1,2,3,4), map(3,4,5,6)) from numbers(2);

From bbaa08199a1657d5591481c1901dabf7fcb73f19 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Sun, 21 Jan 2024 23:32:40 +0000
Subject: [PATCH 213/884] Introduce polling timeout for asynchronous queue

The current timeout for checking updates in the asynchronous queue is
equal to the timeout used for queue entry
(async_insert_busy_timeout_ms).
That means that, in the worst case, an entry spends twice the time of the
asynchronous timeout in the queue.
---
 src/Core/Settings.h                          | 1 +
 src/Interpreters/AsynchronousInsertQueue.cpp | 2 +-
 src/Interpreters/Context.cpp                 | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4bb48cb3a29..bc518ca856b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -751,6 +751,7 @@ class IColumn;
     M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
     M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \
     M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
+    M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \
     \
     M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
     M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 8206c31624c..3aa61f93924 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -430,7 +430,7 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
             std::unique_lock lock(shard.mutex);
 
             shard.are_tasks_available.wait_for(lock,
-                Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [&shard, this]
+                Milliseconds(getContext()->getSettingsRef().async_insert_poll_timeout_ms), [&shard, this]
             {
                 if (shutdown)
                     return true;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 75c20b0a520..0a4d01de86d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4867,6 +4867,9 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptr<AsynchronousInser
     if (std::chrono::milliseconds(settings.async_insert_busy_timeout_ms) == 0ms)
         throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_busy_timeout_ms can't be zero");
 
+    if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == 0ms)
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero");
+
     shared->async_insert_queue = ptr;
 }
 

From 596966850394ed4f0ed6b046820963ca6994a8d4 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 31 Jan 2024 19:58:28 +0100
Subject: [PATCH 214/884] Concurrent table creation in DatabaseReplicated

---
 programs/server/Server.cpp              |  5 ++
 src/Common/CurrentMetrics.cpp           |  3 ++
 src/Core/ServerSettings.h               |  2 +
 src/Databases/DatabaseReplicated.cpp    | 65 +++++++++++++++++--------
 src/Databases/TablesDependencyGraph.cpp | 12 +++++
 src/Databases/TablesDependencyGraph.h   |  3 ++
 src/IO/SharedThreadPools.cpp            |  9 ++++
 src/IO/SharedThreadPools.h              |  3 ++
 8 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 75ec574c357..8d63eeb2cab 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -826,6 +826,11 @@ try
         0, // We don't need any threads one all the parts will be deleted
         server_settings.max_parts_cleaning_thread_pool_size);
 
+    getDatabaseReplicatedCreateTablesThreadPool().initialize(
+        server_settings.max_database_replicated_create_table_thread_pool_size,
+        0, // We don't need any threads once all the tables will be created
+        server_settings.max_database_replicated_create_table_thread_pool_size);
+
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
     {
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index e7534f71dbb..0f546e649f9 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -176,6 +176,9 @@
     M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
     M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
     M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \
+    M(DatabaseReplicatedCreateTablesThreads, "Number of threads in the threadpool for table creation in DatabaseReplicated.") \
+    M(DatabaseReplicatedCreateTablesThreadsActive, "Number of active threads in the threadpool for table creation in DatabaseReplicated.") \
+    M(DatabaseReplicatedCreateTablesThreadsScheduled, "Number of queued or active jobs in the threadpool for table creation in DatabaseReplicated.") \
     M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
     M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
     M(IDiskCopierThreadsScheduled, "Number of queued or active jobs for copying data between disks of different types.") \
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 1940646a22f..0d48ddca687 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -114,6 +114,8 @@ namespace DB
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
+    M(Bool, allow_database_replicated_concurrent_table_creation, false, "Allow concurrent table creation during replica recovery in DatabaseReplicated.", 0) \
+    M(UInt64, max_database_replicated_create_table_thread_pool_size, 32, "The number of threads to create tables during replica recovery in DatabaseReplicated.", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index bb82ef381a5..b184b09f6a6 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -29,6 +29,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/SharedThreadPools.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTFunction.h>
@@ -1091,31 +1092,57 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     }
 
     tables_dependencies.checkNoCyclicDependencies();
-    auto tables_to_create = tables_dependencies.getTablesSortedByDependency();
 
-    for (const auto & table_id : tables_to_create)
+    auto allow_concurrent_table_creation = getContext()->getServerSettings().allow_database_replicated_concurrent_table_creation;
+    auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
+
+    auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "DatabaseReplicatedCreateTables");
+    std::vector<std::future<void>> create_table_futures;
+
+    for (const auto & [_, tables_to_create] : tables_to_create_by_level)
     {
-        auto table_name = table_id.getTableName();
-        auto metadata_it = table_name_to_metadata.find(table_name);
-        if (metadata_it == table_name_to_metadata.end())
+        for (const auto & table_id : tables_to_create)
         {
-            /// getTablesSortedByDependency() may return some not existing tables or tables from other databases
-            LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
-                        "but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
-            continue;
+            auto task = [&]()
+            {
+                auto table_name = table_id.getTableName();
+                auto metadata_it = table_name_to_metadata.find(table_name);
+                if (metadata_it == table_name_to_metadata.end())
+                {
+                    /// getTablesSortedByDependency() may return some not existing tables or tables from other databases
+                    LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
+                                "but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
+                    return;
+                }
+
+                const auto & create_query_string = metadata_it->second;
+                if (isTableExist(table_name, getContext()))
+                {
+                    assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
+                    return;
+                }
+
+                auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string);
+                LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
+                auto create_query_context = make_query_context();
+                InterpreterCreateQuery(query_ast, create_query_context).execute();
+            };
+
+            if (allow_concurrent_table_creation)
+                create_table_futures.push_back(create_tables_runner(task, Priority{0}));
+            else
+                task();
         }
 
-        const auto & create_query_string = metadata_it->second;
-        if (isTableExist(table_name, getContext()))
-        {
-            assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
-            continue;
-        }
+        /// First wait for all tasks to finish.
+        for (auto & future : create_table_futures)
+            future.wait();
 
-        auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string);
-        LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
-        auto create_query_context = make_query_context();
-        InterpreterCreateQuery(query_ast, create_query_context).execute();
+        /// Now rethrow the first exception if any.
+        for (auto & future : create_table_futures)
+            future.get();
+
+        create_table_futures.clear();
     }
     LOG_INFO(log, "All tables are created successfully");
 
diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index 6b9e202d900..c37c6a74152 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -1,3 +1,4 @@
+#include <unordered_map>
 #include <Databases/TablesDependencyGraph.h>
 #include <Common/logger_useful.h>
 #include <IO/WriteHelpers.h>
@@ -699,6 +700,17 @@ std::vector<StorageID> TablesDependencyGraph::getTablesSortedByDependency() cons
 }
 
 
+std::map<size_t, std::vector<StorageID>> TablesDependencyGraph::getTablesSortedByDependencyWithLevels() const
+{
+    std::map<size_t, std::vector<StorageID>> tables_by_level;
+    for (const auto * node : getNodesSortedByLevel())
+    {
+        tables_by_level[node->level].emplace_back(node->storage_id);
+    }
+    return tables_by_level;
+}
+
+
 void TablesDependencyGraph::log() const
 {
     if (nodes.empty())
diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h
index f0553cef321..9d9c926b3a8 100644
--- a/src/Databases/TablesDependencyGraph.h
+++ b/src/Databases/TablesDependencyGraph.h
@@ -107,6 +107,9 @@ public:
     /// tables which depend on the tables which depend on the tables without dependencies, and so on.
     std::vector<StorageID> getTablesSortedByDependency() const;
 
+    /// TODO
+    std::map<size_t, std::vector<StorageID>> getTablesSortedByDependencyWithLevels() const;
+
     /// Outputs information about this graph as a bunch of logging messages.
     void log() const;
 
diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
index c8506663bc8..2ea30400ad9 100644
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@@ -20,6 +20,9 @@ namespace CurrentMetrics
     extern const Metric MergeTreeOutdatedPartsLoaderThreads;
     extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
     extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled;
+    extern const Metric DatabaseReplicatedCreateTablesThreads;
+    extern const Metric DatabaseReplicatedCreateTablesThreadsActive;
+    extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled;
 }
 
 namespace DB
@@ -148,4 +151,10 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool()
     return instance;
 }
 
+StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool()
+{
+    static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled);
+    return instance;
+}
+
 }
diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h
index f37f3acefe7..acc5368f8ac 100644
--- a/src/IO/SharedThreadPools.h
+++ b/src/IO/SharedThreadPools.h
@@ -64,4 +64,7 @@ StaticThreadPool & getPartsCleaningThreadPool();
 /// the number of threads by calling enableTurboMode() :-)
 StaticThreadPool & getOutdatedPartsLoadingThreadPool();
 
+/// ThreadPool used for creating tables in DatabaseReplicated.
+StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool();
+
 }

From 3de507b545ebaf3d4b9fff0c6fba50cddbb09598 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 31 Jan 2024 22:55:54 +0100
Subject: [PATCH 215/884] Update comment

---
 src/Databases/TablesDependencyGraph.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h
index 9d9c926b3a8..18cdc999ee1 100644
--- a/src/Databases/TablesDependencyGraph.h
+++ b/src/Databases/TablesDependencyGraph.h
@@ -107,7 +107,10 @@ public:
     /// tables which depend on the tables which depend on the tables without dependencies, and so on.
     std::vector<StorageID> getTablesSortedByDependency() const;
 
-    /// TODO
+    /// Returns a map of lists of tables by the number of dependencies they have:
+    /// tables without dependencies first with level 0, then
+    /// tables with depend on the tables without dependencies with level 1, then
+    /// tables which depend on the tables which depend on the tables without dependencies with level 2, and so on.
     std::map<size_t, std::vector<StorageID>> getTablesSortedByDependencyWithLevels() const;
 
     /// Outputs information about this graph as a bunch of logging messages.

From 6871d233e9d1f31d78cc8f4aaf16556bb7701887 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 1 Feb 2024 02:47:38 +0100
Subject: [PATCH 216/884] Adjust threadpool name

---
 src/Databases/DatabaseReplicated.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index b184b09f6a6..6db6bb8c034 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1096,7 +1096,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     auto allow_concurrent_table_creation = getContext()->getServerSettings().allow_database_replicated_concurrent_table_creation;
     auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
 
-    auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "DatabaseReplicatedCreateTables");
+    auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
     std::vector<std::future<void>> create_table_futures;
 
     for (const auto & [_, tables_to_create] : tables_to_create_by_level)

From 896c55dd98b61143ed3fba98514e1ffc68069ba1 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 1 Feb 2024 02:59:33 +0100
Subject: [PATCH 217/884] Remove redundant dependency

---
 src/Databases/TablesDependencyGraph.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index c37c6a74152..007bcb5ab17 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -1,4 +1,3 @@
-#include <unordered_map>
 #include <Databases/TablesDependencyGraph.h>
 #include <Common/logger_useful.h>
 #include <IO/WriteHelpers.h>

From b3418c506e8a8c3223d66e1f7ba04bbd980b6401 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:28:33 +0100
Subject: [PATCH 218/884] Update src/Storages/S3Queue/S3QueueSettings.h

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Storages/S3Queue/S3QueueSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h
index 5c846b4f294..c26e973a1c0 100644
--- a/src/Storages/S3Queue/S3QueueSettings.h
+++ b/src/Storages/S3Queue/S3QueueSettings.h
@@ -22,7 +22,7 @@ class ASTStorage;
     M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \
     M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \
     M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \
-    M(String, s3queue_last_processed_path, "", "For Ordered mode. Files with smaller file name are considered already processed", 0) \
+    M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \
     M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \
     M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \
     M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \

From b53abf8186ac6e3834dafca2c93057d483c0a56b Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 1 Feb 2024 10:04:34 +0000
Subject: [PATCH 219/884] Finalize

---
 contrib/NuRaft                                |  2 +-
 src/Common/ProfileEvents.cpp                  |  5 ++++
 src/Coordination/Changelog.cpp                | 12 ++++++++
 src/Coordination/Changelog.h                  | 29 ++++++++++++++++++-
 src/Coordination/CoordinationSettings.cpp     |  2 +-
 src/Coordination/FourLetterCommand.cpp        | 29 +++++++++++++++++++
 src/Coordination/FourLetterCommand.h          | 29 ++++++++++++++-----
 .../KeeperAsynchronousMetrics.cpp             |  6 ++++
 8 files changed, 103 insertions(+), 11 deletions(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 1278e32bb0d..a44f99fbfb9 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63
+Subproject commit a44f99fbfb9bead06630afb0a4bef2bad48d6e4c
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 8782f895f3f..c797182819d 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -616,6 +616,11 @@ The server successfully detected this situation and will download merged part fr
     M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \
     \
     M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \
+    \
+    M(KeeperLogsEntryReadFromLatestCache, "Number of log entries in Keeper being read from latest logs cache") \
+    M(KeeperLogsEntryReadFromCommitCache, "Number of log entries in Keeper being read from commit logs cache") \
+    M(KeeperLogsEntryReadFromFile, "Number of log entries in Keeper being read directly from the changelog file") \
+    M(KeeperLogsPrefetchedEntries, "Number of log entries in Keeper being prefetched from the changelog file") \
 
 #ifdef APPLY_FOR_EXTERNAL_EVENTS
     #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 1d7aa62b1d1..fd86dffdd10 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -21,10 +21,18 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/ThreadPool.h>
+#include <Common/ProfileEvents.h>
 #include <libnuraft/log_val_type.hxx>
 #include <libnuraft/log_entry.hxx>
 #include <libnuraft/raft_server.hxx>
 
+namespace ProfileEvents
+{
+    extern const Event KeeperLogsEntryReadFromLatestCache;
+    extern const Event KeeperLogsEntryReadFromCommitCache;
+    extern const Event KeeperLogsEntryReadFromFile;
+    extern const Event KeeperLogsPrefetchedEntries;
+}
 
 namespace DB
 {
@@ -691,6 +699,7 @@ void LogEntryStorage::prefetchCommitLogs()
                 auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
                 file->seek(position, SEEK_SET);
                 LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position);
+                ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count);
 
                 for (size_t i = 0; i < count; ++i)
                 {
@@ -1084,10 +1093,12 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
     else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index))
     {
         entry = std::move(entry_from_latest_cache);
+        ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromLatestCache);
     }
     else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index))
     {
         entry = std::move(entry_from_commit_cache);
+        ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromCommitCache);
     }
     else if (auto it = logs_location.find(index); it != logs_location.end())
     {
@@ -1099,6 +1110,7 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
 
         auto record = readChangelogRecord(*file, changelog_description->path);
         entry = logEntryFromRecord(record);
+        ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
     }
     return entry;
 }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index e4c3117e6cf..71507d67833 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -123,7 +123,34 @@ struct CacheEntry
 using IndexToCacheEntry = std::unordered_map<uint64_t, CacheEntry>;
 using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type;
 
-
+/**
+  * Storage for storing and handling deserialized entries from disk.
+  * It consists of 2 in-memory caches that rely heavily on the way
+  * entries are used in Raft.
+  * Random and repeated access to certain entries is almost never done so we can't implement a solution
+  * like LRU/SLRU cache because entries would be cached and never read again. 
+  * Entries are often read sequentially for 2 cases:
+  * - for replication
+  * - for committing
+  *
+  * First cache will store latest logs in memory, limited by the latest_logs_cache_size_threshold coordination setting.
+  * Once the log is persisted to the disk, we store it's location in the file and allow the storage
+  * to evict that log from cache if it's needed.
+  * Latest logs cache should have a high hit rate in "normal" operation for both replication and committing.
+  *
+  * As we commit (and read) logs sequentially, we will try to read from latest logs cache.
+  * In some cases, latest logs could be ahead from last committed log by more than latest_logs_cache_size_threshold
+  * which means that for each commit we would need to read the log from disk.
+  * In case latest logs cache hits the threshold we have a second cache called commit logs cache limited by commit_logs_cache_size_threshold.
+  * If a log is evicted from the latest logs cache, we check if we can move it to commit logs cache to avoid re-reading the log from disk.
+  * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold 
+  * we cannot move the entries from latest logs and we will need to refill the commit cache from disk.
+  * To avoid reading entry by entry (which can have really bad effect on performance because we support disks based on S3),
+  * we try to prefetch multiple entries ahead of time because we know that they will be read by commit thread
+  * in the future.
+  * Commit logs cache should have a high hit rate if we start with a lot of unprocessed logs that cannot fit in the
+  * latest logs cache.
+  */
 struct LogEntryStorage
 {
     explicit LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_);
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 61ecd40ee7f..ea1acf02450 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -46,7 +46,7 @@ const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD =
 #if USE_JEMALLOC
 "jmst,jmfp,jmep,jmdp,"
 #endif
-"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
+"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld,pfev";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 18d12cef8e2..09e99f69fd0 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -9,6 +9,7 @@
 #include <Common/getCurrentProcessFDCount.h>
 #include <Common/getMaxFileDescriptorCount.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/config_version.h>
 #include "Coordination/KeeperFeatureFlags.h"
 #include <Coordination/Keeper4LWInfo.h>
 #include <IO/WriteHelpers.h>
@@ -193,6 +194,8 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr jemalloc_disable_profile = std::make_shared<JemallocDisableProfile>(keeper_dispatcher);
         factory.registerCommand(jemalloc_disable_profile);
 #endif
+        FourLetterCommandPtr profile_events_command = std::make_shared<ProfileEventsCommand>(keeper_dispatcher);
+        factory.registerCommand(profile_events_command);
 
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
@@ -650,4 +653,30 @@ String JemallocDisableProfile::run()
 }
 #endif
 
+String ProfileEventsCommand::run()
+{
+    StringBuffer ret;
+
+    auto append = [&ret] (const String & metric, uint64_t value, const String & docs) -> void
+    {
+        writeText(metric, ret);
+        writeText('\t', ret);
+        writeText(std::to_string(value), ret);
+        writeText('\t', ret);
+        writeText(docs, ret);
+        writeText('\n', ret);
+    };
+
+    for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i)
+    {
+        const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed);
+
+        std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
+        std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(i))};
+        append(metric_name, counter, metric_doc);
+    }
+
+    return ret.str();
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index 7fc044881cf..82b30a0b5f6 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -1,18 +1,19 @@
 #pragma once
 
-#include <sstream>
-#include <string>
+#include "config.h"
+
 #include <unordered_map>
-
-#include <Coordination/KeeperDispatcher.h>
-#include <IO/WriteBufferFromString.h>
-
-#include <Common/config_version.h>
-
+#include <string>
+#include <boost/noncopyable.hpp>
 
 namespace DB
 {
 
+class WriteBufferFromOwnString;
+class KeeperDispatcher;
+
+using String = std::string;
+
 struct IFourLetterCommand;
 using FourLetterCommandPtr = std::shared_ptr<DB::IFourLetterCommand>;
 
@@ -479,4 +480,16 @@ struct JemallocDisableProfile : public IFourLetterCommand
 };
 #endif
 
+struct ProfileEventsCommand : public IFourLetterCommand
+{
+    explicit ProfileEventsCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "pfev"; }
+    String run() override;
+    ~ProfileEventsCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp
index 8f6e1dec6c1..242e8608337 100644
--- a/src/Coordination/KeeperAsynchronousMetrics.cpp
+++ b/src/Coordination/KeeperAsynchronousMetrics.cpp
@@ -99,6 +99,12 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
     new_values["KeeperTargetCommitLogIdx"] = { keeper_log_info.target_committed_log_idx, "Index until which logs can be committed in ClickHouse Keeper." };
     new_values["KeeperLastSnapshotIdx"] = { keeper_log_info.last_snapshot_idx, "Index of the last log present in the last created snapshot." };
 
+    new_values["KeeperLatestLogsCacheEntries"] = {keeper_log_info.latest_logs_cache_entries, "Number of entries stored in the in-memory cache for latest logs"};
+    new_values["KeeperLatestLogsCacheSize"] = {keeper_log_info.latest_logs_cache_size, "Total size of in-memory cache for latest logs"};
+
+    new_values["KeeperCommitLogsCacheEntries"] = {keeper_log_info.commit_logs_cache_entries, "Number of entries stored in the in-memory cache for next logs to be committed"};
+    new_values["KeeperCommitLogsCacheSize"] = {keeper_log_info.commit_logs_cache_size, "Total size of in-memory cache for next logs to be committed"};
+
     auto & keeper_connection_stats = keeper_dispatcher.getKeeperConnectionStats();
 
     new_values["KeeperMinLatency"] = { keeper_connection_stats.getMinLatency(), "Minimal request latency of ClickHouse Keeper." };

From 8afa7c4dc80805c3fda4a74376d3786f00c1bcbb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 1 Feb 2024 10:18:07 +0000
Subject: [PATCH 220/884] Randomize cache sizes in tests

---
 tests/config/config.d/keeper_port.xml | 5 ++++-
 tests/config/install.sh               | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index b724d5dd87e..0487ceed989 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -3,7 +3,7 @@
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
 
-        <create_snapshot_on_exit>1</create_snapshot_on_exit>
+        <create_snapshot_on_exit>0</create_snapshot_on_exit>
         <digest_enabled>1</digest_enabled>
 
         <coordination_settings>
@@ -24,6 +24,9 @@
             <compress_logs>0</compress_logs>
 
             <async_replication>1</async_replication>
+
+            <latest_logs_cache_size_threshold>31557632</latest_logs_cache_size_threshold>
+            <commit_logs_cache_size_threshold>20623360</commit_logs_cache_size_threshold>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index cfe810cda84..9873af2f6cd 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -134,6 +134,12 @@ fi
 value=$(($RANDOM % 2))
 sed --follow-symlinks -i "s|<create_snapshot_on_exit>[01]</create_snapshot_on_exit>|<create_snapshot_on_exit>$value</create_snapshot_on_exit>|" $DEST_SERVER_PATH/config.d/keeper_port.xml
 
+value=$((($RANDOM + 100) * 2048))
+sed --follow-symlinks -i "s|<latest_logs_cache_size_threshold>[[:digit:]]\+</latest_logs_cache_size_threshold>|<latest_logs_cache_size_threshold>$value</latest_logs_cache_size_threshold>|" $DEST_SERVER_PATH/config.d/keeper_port.xml
+
+value=$((($RANDOM + 100) * 2048))
+sed --follow-symlinks -i "s|<commit_logs_cache_size_threshold>[[:digit:]]\+</commit_logs_cache_size_threshold>|<commit_logs_cache_size_threshold>$value</commit_logs_cache_size_threshold>|" $DEST_SERVER_PATH/config.d/keeper_port.xml
+
 if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
     ln -sf $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/
 fi

From 4ee7275f21458522d55e5eecf68100c8a8d7d3da Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 29 Jan 2024 13:30:25 +0100
Subject: [PATCH 221/884] Improve dry run for lambda deployment

---
 tests/ci/team_keys_lambda/build_and_deploy_archive.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index a55c1bb2b3b..5b377d3c184 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -35,6 +35,8 @@ if [ -f requirements.txt ]; then
 fi
 ( cd "$PACKAGE" && zip -9 -r ../"$PACKAGE".zip . )
 
-if [ -z "$DRY_RUN" ]; then
-  aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$WORKDIR/$PACKAGE".zip
+ECHO=()
+if [ -n "$DRY_RUN" ]; then
+  ECHO=(echo Run the following command to push the changes:)
 fi
+"${ECHO[@]}" aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$WORKDIR/$PACKAGE".zip

From 76c6dc51bf2fc5f99aabfeef9d99cf859772b17f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 29 Jan 2024 14:00:22 +0100
Subject: [PATCH 222/884] Unify the apt cleaning part in Dockerfile's

---
 docker/packager/binary/Dockerfile             |  2 +-
 docker/server/Dockerfile.ubuntu               |  5 +-
 docker/test/base/Dockerfile                   |  5 +-
 docker/test/fasttest/Dockerfile               |  4 +-
 docker/test/fuzzer/Dockerfile                 |  2 +-
 docker/test/install/deb/Dockerfile            | 10 ++--
 .../test/integration/hive_server/Dockerfile   | 47 +++++++++----------
 .../postgresql_java_client/Dockerfile         | 12 ++---
 docker/test/integration/runner/Dockerfile     |  7 +--
 docker/test/keeper-jepsen/Dockerfile          |  5 +-
 docker/test/libfuzzer/Dockerfile              |  2 +-
 docker/test/performance-comparison/Dockerfile |  2 +-
 docker/test/server-jepsen/Dockerfile          |  4 +-
 docker/test/sqlancer/Dockerfile               |  7 +--
 docker/test/sqllogic/Dockerfile               |  3 +-
 docker/test/sqltest/Dockerfile                |  3 +-
 docker/test/stateful/Dockerfile               |  3 +-
 docker/test/stateless/Dockerfile              |  3 +-
 ...lickhouse-statelest-test-runner.Dockerfile |  8 ++--
 docker/test/stress/Dockerfile                 |  3 +-
 docker/test/style/Dockerfile                  |  3 +-
 docker/test/upgrade/Dockerfile                |  3 +-
 docker/test/util/Dockerfile                   |  8 +++-
 23 files changed, 80 insertions(+), 71 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 1a99ab0d0b6..e20cbe9781c 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -72,7 +72,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
         zstd \
         zip \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 # Download toolchain and SDK for Darwin
 RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 60ef7a67563..341b3863c89 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -23,10 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
         tzdata \
         wget \
     && apt-get clean \
-    && rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile
index b48017fdacc..55229e893de 100644
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@@ -13,7 +13,10 @@ RUN apt-get update \
         zstd \
         locales \
         sudo \
-        --yes --no-install-recommends
+        --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+
 
 # Sanitizer options for services (clickhouse-server)
 # Set resident memory limit for TSAN to 45GiB (46080MiB) to avoid OOMs in Stress tests
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 56ec0199849..64f6a1846a5 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -20,7 +20,9 @@ RUN apt-get update \
         pv \
         jq \
         zstd \
-    --yes --no-install-recommends
+        --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
 
diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile
index 0bc0fb06633..d3f78ac1d95 100644
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@@ -29,7 +29,7 @@ RUN apt-get update \
             wget \
     && apt-get autoremove --yes \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install Jinja2
 
diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile
index e9c928b1fe7..71daffa6f2a 100644
--- a/docker/test/install/deb/Dockerfile
+++ b/docker/test/install/deb/Dockerfile
@@ -10,13 +10,13 @@ ENV \
   init=/lib/systemd/systemd
 
 # install systemd packages
-RUN apt-get update && \
-  apt-get install -y --no-install-recommends \
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
     sudo \
     systemd \
-    && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists
+    \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 # configure systemd
 # remove systemd 'wants' triggers
diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile
index e37e2800557..e34899e3329 100644
--- a/docker/test/integration/hive_server/Dockerfile
+++ b/docker/test/integration/hive_server/Dockerfile
@@ -1,31 +1,27 @@
 FROM ubuntu:20.04
 MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>
 
-RUN apt-get update 
-RUN apt-get install -y wget openjdk-8-jre
-
-RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
-        tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
-RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
-        tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
-RUN apt install -y vim
-
-RUN apt install -y openssh-server openssh-client
-
-RUN apt install -y mysql-server
-
-RUN mkdir -p /root/.ssh && \
-        ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \
-        cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \
-        cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \
-        cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub
-
-RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\
-        tar -xf mysql-connector-java-8.0.27.tar.gz && \
-        mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
-        rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
-
-RUN apt install -y iputils-ping net-tools
+RUN apt-get update \
+  && apt-get install -y wget openjdk-8-jre \
+  && wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz \
+  && tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz \
+  && wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz \
+  && tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz \
+  && apt install -y vim \
+  && apt install -y openssh-server openssh-client \
+  && apt install -y mysql-server \
+  && mkdir -p /root/.ssh \
+  && ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa \
+  && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys \
+  && cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key \
+  && cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub \
+  && wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz \
+  && tar -xf mysql-connector-java-8.0.27.tar.gz \
+  && mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ \
+  && rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27 \
+  && apt install -y iputils-ping net-tools \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 ENV JAVA_HOME=/usr
 ENV HADOOP_HOME=/hadoop-3.1.0
@@ -44,4 +40,3 @@ COPY demo_data.txt /
 ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
 RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
 COPY start.sh /
-
diff --git a/docker/test/integration/postgresql_java_client/Dockerfile b/docker/test/integration/postgresql_java_client/Dockerfile
index f5484028ec9..c5583085ef3 100644
--- a/docker/test/integration/postgresql_java_client/Dockerfile
+++ b/docker/test/integration/postgresql_java_client/Dockerfile
@@ -3,14 +3,10 @@
 
 FROM ubuntu:18.04
 
-RUN apt-get update && \
-    apt-get install -y software-properties-common build-essential openjdk-8-jdk curl
-
-RUN rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-RUN apt-get clean
+RUN apt-get update \
+    && apt-get install -y software-properties-common build-essential openjdk-8-jdk curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 ARG ver=42.2.12
 RUN curl -L -o /postgresql-java-${ver}.jar https://repo1.maven.org/maven2/org/postgresql/postgresql/${ver}/postgresql-${ver}.jar
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index c795fbf0672..ed8c75ab00f 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -37,11 +37,8 @@ RUN apt-get update \
     libkrb5-dev \
     krb5-user \
     g++ \
-    && rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 ENV TZ=Etc/UTC
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
diff --git a/docker/test/keeper-jepsen/Dockerfile b/docker/test/keeper-jepsen/Dockerfile
index a794e076ec0..3c5d0a6ecb4 100644
--- a/docker/test/keeper-jepsen/Dockerfile
+++ b/docker/test/keeper-jepsen/Dockerfile
@@ -24,7 +24,10 @@ RUN mkdir "/root/.ssh"
 RUN touch "/root/.ssh/known_hosts"
 
 # install java
-RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends
+RUN apt-get update && \
+    apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 # install clojure
 RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \
diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile
index 081cf5473f8..c9802a0e44e 100644
--- a/docker/test/libfuzzer/Dockerfile
+++ b/docker/test/libfuzzer/Dockerfile
@@ -27,7 +27,7 @@ RUN apt-get update \
             wget \
     && apt-get autoremove --yes \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install Jinja2
 
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index e4ced104445..1835900b316 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -37,7 +37,7 @@ RUN apt-get update \
     && apt-get purge --yes python3-dev g++ \
     && apt-get autoremove --yes \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 COPY run.sh /
 
diff --git a/docker/test/server-jepsen/Dockerfile b/docker/test/server-jepsen/Dockerfile
index a212427b2a1..fd70fc45702 100644
--- a/docker/test/server-jepsen/Dockerfile
+++ b/docker/test/server-jepsen/Dockerfile
@@ -31,7 +31,9 @@ RUN mkdir "/root/.ssh"
 RUN touch "/root/.ssh/known_hosts"
 
 # install java
-RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends
+RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 # install clojure
 RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \
diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile
index 5977044345e..82fc2598397 100644
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@@ -5,9 +5,10 @@ FROM ubuntu:22.04
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
-RUN apt-get update --yes && \
-	env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends && \
-	apt-get clean
+RUN apt-get update --yes \
+    && env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends  \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 # We need to get the repository's HEAD each time despite, so we invalidate layers' cache
 ARG CACHE_INVALIDATOR=0
diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile
index 48457a99de3..05130044c45 100644
--- a/docker/test/sqllogic/Dockerfile
+++ b/docker/test/sqllogic/Dockerfile
@@ -15,7 +15,8 @@ RUN apt-get update --yes \
             unixodbc-dev \
             odbcinst \
             sudo \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install \
     numpy \
diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile
index 437677f4fd1..7f59f65761f 100644
--- a/docker/test/sqltest/Dockerfile
+++ b/docker/test/sqltest/Dockerfile
@@ -11,7 +11,8 @@ RUN apt-get update --yes \
             python3-dev \
             python3-pip \
             sudo \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install \
     pyyaml \
diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile
index 3509998e1d4..355e70f180e 100644
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@@ -9,7 +9,8 @@ RUN apt-get update -y \
         python3-requests \
         nodejs \
         npm \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 COPY create.sql /
 COPY run.sh /
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index bc26234db24..66ba0a58e03 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -44,7 +44,8 @@ RUN apt-get update -y \
             pv \
             zip \
             p7zip-full \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 RUN pip3 install numpy scipy pandas Jinja2 pyarrow
 
diff --git a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile
index dc83e8b8d2e..a9802f6f1da 100644
--- a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile
+++ b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile
@@ -9,6 +9,8 @@ FROM ubuntu:20.04 as clickhouse-test-runner-base
 VOLUME /packages
 
 CMD apt-get update ;\
-	DEBIAN_FRONTEND=noninteractive \
-	apt install -y /packages/clickhouse-common-static_*.deb \
-		/packages/clickhouse-client_*.deb
+    DEBIAN_FRONTEND=noninteractive \
+    apt install -y /packages/clickhouse-common-static_*.deb \
+        /packages/clickhouse-client_*.deb \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile
index 5a9625d8109..0f81a1cd07f 100644
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@@ -19,7 +19,8 @@ RUN apt-get update -y \
             openssl \
             netcat-openbsd \
             brotli \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 COPY run.sh /
 
diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index a4feae27c67..f2bac2f5da4 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -21,7 +21,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
     locales \
     && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
     && apt-get clean \
-    && rm -rf /root/.cache/pip 
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
+    && rm -rf /root/.cache/pip
 
 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
 ENV LC_ALL en_US.UTF-8
diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile
index 9152230af1c..78d912fd031 100644
--- a/docker/test/upgrade/Dockerfile
+++ b/docker/test/upgrade/Dockerfile
@@ -19,7 +19,8 @@ RUN apt-get update -y \
             openssl \
             netcat-openbsd \
             brotli \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
 COPY run.sh /
 
diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile
index eb5abce280a..054eac5f764 100644
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@@ -27,7 +27,9 @@ RUN apt-get update \
     && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
     && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
         /etc/apt/sources.list \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+
 
 # Install cmake 3.20+ for rust support
 # Used https://askubuntu.com/a/1157132 as reference
@@ -60,7 +62,9 @@ RUN apt-get update \
         software-properties-common \
         tzdata \
         --yes --no-install-recommends \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+
 
 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld

From 2577f8031bee7f549614e6e47d975862db0c9f5c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 29 Jan 2024 14:04:48 +0100
Subject: [PATCH 223/884] Add suid bit to gdb in fast tests image

---
 docker/test/fasttest/Dockerfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 64f6a1846a5..e10555d4d4a 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -33,12 +33,14 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
   && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
   && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
   && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
-  && rm -rf /tmp/clickhouse-odbc-tmp \
+  && rm -rf /tmp/clickhouse-odbc-tmp
+
+# Give suid to gdb to grant it attach permissions
+# chmod 777 to make the container user independent
+RUN chmod u+s /usr/bin/gdb \
   && mkdir -p /var/lib/clickhouse \
   && chmod 777 /var/lib/clickhouse
 
-# chmod 777 to make the container user independent
-
 ENV TZ=Europe/Amsterdam
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 

From 6f1fbdb5ca30f608bc2038068934cb314a95fa4f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 31 Jan 2024 15:08:39 +0100
Subject: [PATCH 224/884] Fix the failed upgrade of kazoo to 2.10.0

---
 docker/test/integration/runner/Dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index ed8c75ab00f..9199e941476 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -59,6 +59,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
     && dockerd --version; docker --version
 
 
+# kazoo 2.10.0 is broken
+# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
 RUN python3 -m pip install --no-cache-dir \
     PyMySQL \
     aerospike==11.1.0 \
@@ -76,7 +78,7 @@ RUN python3 -m pip install --no-cache-dir \
     grpcio \
     grpcio-tools \
     kafka-python \
-    kazoo \
+    kazoo==2.9.0 \
     lz4 \
     minio \
     nats-py \

From a043227685a254159a0b1469d11e5f9756f943b3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 1 Feb 2024 10:36:31 +0000
Subject: [PATCH 225/884] Fix ARRAY JOIN with subcolumns in analyzer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 96 ++++++++++++++++++-
 tests/analyzer_integration_broken_tests.txt   |  1 -
 ...74_analyzer_array_join_subcolumn.reference | 16 ++++
 .../02974_analyzer_array_join_subcolumn.sql   | 24 +++++
 4 files changed, 133 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.reference
 create mode 100644 tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 881d5cd00b2..d92acc2b948 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -424,6 +424,7 @@ struct TableExpressionData
     bool should_qualify_columns = true;
     NamesAndTypes column_names_and_types;
     ColumnNameToColumnNodeMap column_name_to_column_node;
+    std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns
     std::unordered_set<std::string, StringTransparentHash, std::equal_to<>> column_identifier_first_parts;
 
     bool hasFullIdentifierName(IdentifierView identifier_view) const
@@ -1306,6 +1307,12 @@ private:
         const QueryTreeNodePtr & table_expression_node,
         IdentifierResolveScope & scope);
 
+    QueryTreeNodePtr matchArrayJoinSubcolumns(
+        const QueryTreeNodePtr & array_join_column_inner_expression,
+        const ColumnNode & array_join_column_expression_typed,
+        const QueryTreeNodePtr & resolved_expression,
+        IdentifierResolveScope & scope);
+
     QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
         const QueryTreeNodePtr & table_expression_node,
         IdentifierResolveScope & scope);
@@ -2910,8 +2917,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
     QueryTreeNodePtr result_expression;
     bool match_full_identifier = false;
 
-    auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
-    if (it != table_expression_data.column_name_to_column_node.end())
+    const auto & identifier_full_name = identifier_without_column_qualifier.getFullName();
+    auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name);
+    bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end();
+    if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name))
+    {
+        /** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage.
+          * For example, consider the following SQL query:
+          * SELECT ProfileEvents.Values FROM system.query_log ARRAY JOIN ProfileEvents
+          * In this case, ProfileEvents.Values should also be array joined, not directly resolved from storage.
+          */
+        auto * nearest_query_scope = scope.getNearestQueryScope();
+        auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
+        if (nearest_query_scope_query_node && nearest_query_scope_query_node->getJoinTree()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
+            can_resolve_directly_from_storage = false;
+    }
+
+    if (can_resolve_directly_from_storage)
     {
         match_full_identifier = true;
         result_expression = it->second;
@@ -3394,6 +3416,68 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
     return resolved_identifier;
 }
 
+QueryTreeNodePtr QueryAnalyzer::matchArrayJoinSubcolumns(
+    const QueryTreeNodePtr & array_join_column_inner_expression,
+    const ColumnNode & array_join_column_expression_typed,
+    const QueryTreeNodePtr & resolved_expression,
+    IdentifierResolveScope & scope)
+{
+    const auto * resolved_function = resolved_expression->as<FunctionNode>();
+    if (!resolved_function || resolved_function->getFunctionName() != "getSubcolumn")
+        return {};
+
+    const auto * array_join_parent_column = array_join_column_inner_expression.get();
+
+    /** If both resolved and array-joined expressions are subcolumns, try to match them:
+      * For example, in `SELECT t.map.values FROM (SELECT * FROM tbl) ARRAY JOIN t.map`
+      * Identifier `t.map.values` is resolved into `getSubcolumn(t, 'map.values')` and t.map is resolved into `getSubcolumn(t, 'map')`
+      * Since we need to perform array join on `getSubcolumn(t, 'map')`, `t.map.values` should become `getSubcolumn(getSubcolumn(t, 'map'), 'values')`
+      *
+      * Note: It doesn't work when subcolumn in ARRAY JOIN is transformed by another expression, for example
+      * SELECT c.map, c.map.values FROM (SELECT * FROM tbl) ARRAY JOIN mapApply(x -> x, t.map);
+      */
+    String array_join_subcolumn_prefix;
+    auto * array_join_column_inner_expression_function = array_join_column_inner_expression->as<FunctionNode>();
+    if (array_join_column_inner_expression_function &&
+        array_join_column_inner_expression_function->getFunctionName() == "getSubcolumn")
+    {
+        const auto & argument_nodes = array_join_column_inner_expression_function->getArguments().getNodes();
+        if (argument_nodes.size() == 2 && argument_nodes.at(1)->getNodeType() == QueryTreeNodeType::CONSTANT)
+        {
+            const auto & constant_node = argument_nodes.at(1)->as<ConstantNode &>();
+            const auto & constant_node_value = constant_node.getValue();
+            if (constant_node_value.getType() == Field::Types::String)
+            {
+                array_join_subcolumn_prefix = constant_node_value.get<String>() + ".";
+                array_join_parent_column = argument_nodes.at(0).get();
+            }
+        }
+    }
+
+    const auto & argument_nodes = resolved_function->getArguments().getNodes();
+    if (argument_nodes.size() != 2 && !array_join_parent_column->isEqual(*argument_nodes.at(0)))
+        return {};
+
+    const auto * second_argument = argument_nodes.at(1)->as<ConstantNode>();
+    if (!second_argument || second_argument->getValue().getType() != Field::Types::String)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree());
+
+    const auto & resolved_subcolumn_path = second_argument->getValue().get<String &>();
+    if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix))
+        return {};
+
+    auto get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
+    get_subcolumn_function->getArguments().getNodes().push_back(
+        std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()));
+    get_subcolumn_function->getArguments().getNodes().push_back(
+        std::make_shared<ConstantNode>(resolved_subcolumn_path.substr(array_join_subcolumn_prefix.size())));
+
+    QueryTreeNodePtr function_query_node = get_subcolumn_function;
+    resolveFunction(function_query_node, scope);
+
+    return function_query_node;
+}
+
 QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
     const QueryTreeNodePtr & table_expression_node,
     IdentifierResolveScope & scope)
@@ -3462,8 +3546,12 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con
                 array_join_column_expression_typed.getColumnSource());
             break;
         }
-    }
 
+        /// When we select subcolumn of array joined column it also should be array joined
+        array_join_resolved_expression = matchArrayJoinSubcolumns(array_join_column_inner_expression, array_join_column_expression_typed, resolved_expression, scope);
+        if (array_join_resolved_expression)
+            break;
+    }
     return array_join_resolved_expression;
 }
 
@@ -6424,6 +6512,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
           */
         for (const auto & column_name_and_type : table_expression_data.column_names_and_types)
         {
+            for (const auto & subcolumn : columns_description.getSubcolumns(column_name_and_type.name))
+                table_expression_data.subcolumn_names.insert(subcolumn.name);
             const auto & column_default = columns_description.getDefault(column_name_and_type.name);
 
             if (column_default && column_default->kind == ColumnDefaultKind::Alias)
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index e1d4de59a23..06142f98da1 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -10,7 +10,6 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
-test_profile_events_s3/test.py::test_profile_events
 test_replicating_constants/test.py::test_different_versions
 test_select_access_rights/test_main.py::test_alias_columns
 test_select_access_rights/test_main.py::test_select_count
diff --git a/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.reference b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.reference
new file mode 100644
index 00000000000..827c710ef1a
--- /dev/null
+++ b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.reference
@@ -0,0 +1,16 @@
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
+('a',(1,2))	1
+('b',(2,3))	2
diff --git a/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql
new file mode 100644
index 00000000000..14823644b96
--- /dev/null
+++ b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+
+CREATE TABLE t2 (id Int32, pe Map(String, Tuple(a UInt64, b UInt64))) ENGINE = MergeTree ORDER BY id;
+INSERT INTO t2 VALUES (1, {'a': (1, 2), 'b': (2, 3)}),
+
+CREATE TABLE t3 (id Int32, c Tuple(v String, pe Map(String, Tuple(a UInt64, b UInt64)))) ENGINE = MergeTree ORDER BY id;
+INSERT INTO t3 VALUES (1, ('A', {'a':(1, 2),'b':(2, 3)}));
+
+SELECT pe, pe.values.a FROM (SELECT * FROM t2) ARRAY JOIN pe SETTINGS allow_experimental_analyzer = 1;
+SELECT p, p.values.a FROM (SELECT * FROM t2) ARRAY JOIN pe AS p SETTINGS allow_experimental_analyzer = 1;
+
+SELECT pe, pe.values.a FROM t2 ARRAY JOIN pe;
+SELECT p, p.values.a FROM t2 ARRAY JOIN pe AS p;
+
+SELECT c.pe, c.pe.values.a FROM (SELECT * FROM t3) ARRAY JOIN c.pe SETTINGS allow_experimental_analyzer = 1;
+SELECT p, p.values.a FROM (SELECT * FROM t3) ARRAY JOIN c.pe as p SETTINGS allow_experimental_analyzer = 1;
+
+SELECT c.pe, c.pe.values.a FROM t3 ARRAY JOIN c.pe SETTINGS allow_experimental_analyzer = 1;
+SELECT p, p.values.a FROM t3 ARRAY JOIN c.pe as p;
+
+
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;

From c197fb27d6bfe8153dfa38e05db177682bc1b109 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 1 Feb 2024 10:25:20 +0000
Subject: [PATCH 226/884] Better

---
 src/Coordination/Changelog.cpp          | 8 ++++----
 src/Coordination/Changelog.h            | 6 +++---
 src/Coordination/KeeperDispatcher.cpp   | 4 +---
 src/Coordination/KeeperStateMachine.cpp | 4 +++-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index fd86dffdd10..3b9b0b26d04 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -986,7 +986,7 @@ bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entr
     /// if commit logs cache is empty, we need it only if it's the next log to commit
     if (commit_logs_cache.empty())
         return keeper_context->lastCommittedIndex() + 1 == index;
-    
+
     return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size);
 }
 
@@ -996,7 +996,7 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l
     while (!latest_logs_cache.hasSpaceAvailable(entry_size))
     {
         auto entry_handle = latest_logs_cache.popOldestEntry();
-        size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry); 
+        size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry);
         if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size))
             commit_logs_cache.addEntry(std::move(entry_handle));
     }
@@ -1257,14 +1257,14 @@ void LogEntryStorage::shutdown()
 {
     if (std::exchange(is_shutdown, true))
         return;
-        
+
     if (!prefetch_queue.isFinished())
         prefetch_queue.finish();
 
     if (current_prefetch_info)
     {
         current_prefetch_info->cancel = true;
-        current_prefetch_info->done.wait(false); 
+        current_prefetch_info->done.wait(false);
     }
 
     if (commit_logs_prefetcher->joinable())
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 71507d67833..5fdb1a27840 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -128,7 +128,7 @@ using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type;
   * It consists of 2 in-memory caches that rely heavily on the way
   * entries are used in Raft.
   * Random and repeated access to certain entries is almost never done so we can't implement a solution
-  * like LRU/SLRU cache because entries would be cached and never read again. 
+  * like LRU/SLRU cache because entries would be cached and never read again.
   * Entries are often read sequentially for 2 cases:
   * - for replication
   * - for committing
@@ -143,7 +143,7 @@ using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type;
   * which means that for each commit we would need to read the log from disk.
   * In case latest logs cache hits the threshold we have a second cache called commit logs cache limited by commit_logs_cache_size_threshold.
   * If a log is evicted from the latest logs cache, we check if we can move it to commit logs cache to avoid re-reading the log from disk.
-  * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold 
+  * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold
   * we cannot move the entries from latest logs and we will need to refill the commit cache from disk.
   * To avoid reading entry by entry (which can have really bad effect on performance because we support disks based on S3),
   * we try to prefetch multiple entries ahead of time because we know that they will be read by commit thread
@@ -179,7 +179,7 @@ struct LogEntryStorage
     void getKeeperLogInfo(KeeperLogInfo & log_info) const;
 
     bool isConfLog(uint64_t index) const;
-    
+
     void shutdown();
 private:
     void prefetchCommitLogs();
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index ab57ab7337d..1175ee9e95f 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -433,7 +433,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
         snapshots_queue,
         keeper_context,
         snapshot_s3,
-        [this](uint64_t log_idx, const KeeperStorage::RequestForSession & request_for_session)
+        [this](uint64_t /*log_idx*/, const KeeperStorage::RequestForSession & request_for_session)
         {
             {
                 /// check if we have queue of read requests depending on this request to be committed
@@ -457,8 +457,6 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
                     }
                 }
             }
-
-            keeper_context->setLastCommitIndex(log_idx);
         });
 
     try
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index 84e86058516..8121a5ac6ce 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -452,11 +452,12 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
 
         ProfileEvents::increment(ProfileEvents::KeeperCommits);
         last_committed_idx = log_idx;
+        keeper_context->setLastCommitIndex(log_idx);
 
         if (commit_callback)
             commit_callback(log_idx, *request_for_session);
     }
-    catch(...)
+    catch (...)
     {
         tryLogCurrentException(log, fmt::format("Failed to commit stored log at index {}", log_idx));
         throw;
@@ -520,6 +521,7 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr<nuraf
     auto tmp = new_conf->serialize();
     cluster_config = ClusterConfig::deserialize(*tmp);
     last_committed_idx = log_idx;
+    keeper_context->setLastCommitIndex(log_idx);
 }
 
 void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)

From d076b13ac8a7e874237832881ba19ea82f14bbf2 Mon Sep 17 00:00:00 2001
From: zhongyuankai <872237106@qq.com>
Date: Thu, 1 Feb 2024 18:51:54 +0800
Subject: [PATCH 227/884] `order by all` doesn't work for `select *`

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 27 ++++++++++---------
 .../0_stateless/02943_order_by_all.reference  |  9 +++++++
 .../0_stateless/02943_order_by_all.sql        | 20 ++++++++++++++
 3 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 881d5cd00b2..621ee700508 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1214,7 +1214,7 @@ private:
 
     static void expandGroupByAll(QueryNode & query_tree_node_typed);
 
-    static void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
+    void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
 
     static std::string
     rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@@ -2349,15 +2349,19 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Se
 
     for (auto & node : projection_nodes)
     {
-        if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
-            if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                    "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
-
-        if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
-            if (Poco::toUpper(function_node->getAlias()) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+        auto resolved_expression_it = resolved_expressions.find(node);
+        if (resolved_expression_it != resolved_expressions.end())
+        {
+            auto projection_names = resolved_expression_it->second;
+            if (projection_names.size() != 1)
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Expression nodes list expected 1 projection names. Actual {}",
+                                projection_names.size());
+            else
+                if (Poco::toUpper(projection_names[0]) == "ALL")
+                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+                                    "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+        }
 
         auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
         list_node->getNodes().push_back(sort_node);
@@ -7180,8 +7184,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
     if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
 
-    expandOrderByAll(query_node_typed, settings);
-
     /// Initialize aliases in query node scope
     QueryExpressionsAliasVisitor visitor(scope);
 
@@ -7368,6 +7370,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         if (settings.enable_positional_arguments)
             replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
 
+        expandOrderByAll(query_node_typed, settings);
         resolveSortNodeList(query_node_typed.getOrderByNode(), scope);
     }
 
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 48d828b6924..1c94206c687 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -82,3 +82,12 @@ B	3	10
 D	1	20
 A	2	30
 C	\N	40
+-- SELECT *
+A	2	30
+B	3	10
+C	\N	40
+D	1	20
+A	2	30
+B	3	10
+C	\N	40
+D	1	20
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 0756563946c..0848b008fe4 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -86,4 +86,24 @@ SELECT a, b, all FROM order_by_all ORDER BY all, a;
 SET allow_experimental_analyzer = 1;
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
+SELECT '-- SELECT *';
+DROP TABLE IF EXISTS order_by_all1;
+
+CREATE TABLE order_by_all1
+(
+    a String,
+    b Nullable(Int32),
+    c UInt64,
+)
+    ENGINE = Memory;
+
+INSERT INTO order_by_all1 VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
+
+SET allow_experimental_analyzer = 0;
+SELECT * FROM order_by_all1 ORDER BY ALL;
+
+SET allow_experimental_analyzer = 1;
+SELECT * FROM order_by_all1 ORDER BY ALL;
+
 DROP TABLE order_by_all;
+DROP TABLE order_by_all1;

From 3a16427e002711005bcfa5b75bf7a1d301348e9f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 1 Feb 2024 12:35:01 +0100
Subject: [PATCH 228/884] Fix test

---
 src/Storages/S3Queue/StorageS3Queue.cpp         | 1 -
 tests/integration/test_storage_s3_queue/test.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 5fc0d19ce0e..0723205b544 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -161,7 +161,6 @@ StorageS3Queue::StorageS3Queue(
     }
     catch (...)
     {
-        S3QueueMetadataFactory::instance().remove(zk_path);
         throw;
     }
 
diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 34146484ced..2cb617b3872 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -541,7 +541,7 @@ def test_multiple_tables_meta_mismatch(started_cluster):
         )
     except QueryRuntimeException as e:
         assert (
-            "Metadata with the same `s3queue_zookeeper_path` was already created but with different settings"
+            "Existing table metadata in ZooKeeper differs in engine mode"
             in str(e)
         )
         failed = True

From 1d8de1d345e9e401c108a7b114933e0dd160cb01 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 1 Feb 2024 19:39:22 +0800
Subject: [PATCH 229/884] fix style

---
 src/Functions/if.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 7108ecfbbf3..9d6badcb645 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -38,7 +38,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NOT_IMPLEMENTED;
-    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace

From fe829cac489ce09f80f8db7d9dfc6de27b9ae685 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 1 Feb 2024 11:43:55 +0000
Subject: [PATCH 230/884] Automatic style fix

---
 tests/integration/test_storage_s3_queue/test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 2cb617b3872..810c4f29e9d 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -540,10 +540,7 @@ def test_multiple_tables_meta_mismatch(started_cluster):
             },
         )
     except QueryRuntimeException as e:
-        assert (
-            "Existing table metadata in ZooKeeper differs in engine mode"
-            in str(e)
-        )
+        assert "Existing table metadata in ZooKeeper differs in engine mode" in str(e)
         failed = True
 
     assert failed is True

From 82a4fa0dfcda1f4ea3d987a855d2321dc5a92523 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 1 Feb 2024 11:41:00 +0000
Subject: [PATCH 231/884] Smaller fixups

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp          |  7 +++----
 .../0_stateless/02943_order_by_all.reference       |  2 +-
 tests/queries/0_stateless/02943_order_by_all.sql   | 14 +++++++-------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 621ee700508..cb1e94305fb 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2357,10 +2357,9 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Se
                 throw Exception(ErrorCodes::LOGICAL_ERROR,
                                 "Expression nodes list expected 1 projection names. Actual {}",
                                 projection_names.size());
-            else
-                if (Poco::toUpper(projection_names[0]) == "ALL")
-                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                    "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+            if (Poco::toUpper(projection_names[0]) == "ALL")
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
         }
 
         auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 1c94206c687..6eed33cc68d 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -82,7 +82,7 @@ B	3	10
 D	1	20
 A	2	30
 C	\N	40
--- SELECT *
+-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause
 A	2	30
 B	3	10
 C	\N	40
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 0848b008fe4..0960d75ad96 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -86,10 +86,11 @@ SELECT a, b, all FROM order_by_all ORDER BY all, a;
 SET allow_experimental_analyzer = 1;
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
-SELECT '-- SELECT *';
-DROP TABLE IF EXISTS order_by_all1;
+DROP TABLE order_by_all;
 
-CREATE TABLE order_by_all1
+SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause';
+
+CREATE TABLE order_by_all
 (
     a String,
     b Nullable(Int32),
@@ -97,13 +98,12 @@ CREATE TABLE order_by_all1
 )
     ENGINE = Memory;
 
-INSERT INTO order_by_all1 VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
+INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
 
 SET allow_experimental_analyzer = 0;
-SELECT * FROM order_by_all1 ORDER BY ALL;
+SELECT * FROM order_by_all ORDER BY ALL;
 
 SET allow_experimental_analyzer = 1;
-SELECT * FROM order_by_all1 ORDER BY ALL;
+SELECT * FROM order_by_all ORDER BY ALL;
 
 DROP TABLE order_by_all;
-DROP TABLE order_by_all1;

From 6e82dc81fcf6daa0180ec09f82a1e367788864f6 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 31 Jan 2024 22:35:13 +0100
Subject: [PATCH 232/884] Use fixed seed in test

---
 tests/queries/0_stateless/02975_system_zookeeper_retries.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.sql b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql
index c23a1ad41c9..8b402ec6d65 100644
--- a/tests/queries/0_stateless/02975_system_zookeeper_retries.sql
+++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql
@@ -7,7 +7,8 @@ ORDER BY path, name
 SETTINGS
   insert_keeper_retry_initial_backoff_ms = 1,
   insert_keeper_retry_max_backoff_ms = 20,
-  insert_keeper_fault_injection_probability=0.5,
+  insert_keeper_fault_injection_probability=0.3,
+  insert_keeper_fault_injection_seed=4,
   log_comment='02975_system_zookeeper_retries';
 
 
From 92a00d317cddec0e65d398db01ef7ee71fc818a5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 11:38:22 +0000
Subject: [PATCH 233/884] Cosmetics

---
 base/base/getMemoryAmount.cpp              | 17 +++++----
 src/Common/getNumberOfPhysicalCPUCores.cpp | 40 +++++++++++-----------
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index a46e964c5a3..2e89ac52650 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -1,8 +1,10 @@
-#include <stdexcept>
-#include <fstream>
 #include <base/getMemoryAmount.h>
+
 #include <base/getPageSize.h>
 
+#include <fstream>
+#include <stdexcept>
+
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/param.h>
@@ -27,10 +29,11 @@ uint64_t getMemoryAmountOrZero()
     uint64_t memory_amount = num_pages * page_size;
 
 #if defined(OS_LINUX)
-    // Try to lookup at the Cgroup limit
+    /// Limit the memory amount by limits set by cgroups
+    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
 
-    // CGroups v2
-    std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
+    /// cgroups v2
+    std::ifstream cgroupv2_limit(default_cgroups_mount / "memory.max");
     if (cgroupv2_limit.is_open())
     {
         uint64_t memory_limit = 0;
@@ -40,8 +43,8 @@ uint64_t getMemoryAmountOrZero()
     }
     else
     {
-        // CGroups v1
-        std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
+        /// cgroups v1
+        std::ifstream cgroup_limit(default_cgroups_mount / "memory/memory.limit_in_bytes");
         if (cgroup_limit.is_open())
         {
             uint64_t memory_limit = 0; // in case of read error
diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index 8fbb32e911f..21060bbfaf1 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -1,7 +1,6 @@
 #include "getNumberOfPhysicalCPUCores.h"
 #include <filesystem>
 
-#include "config.h"
 #if defined(OS_LINUX)
 #    include <cmath>
 #    include <fstream>
@@ -34,9 +33,9 @@ int32_t readFrom(const std::filesystem::path & filename, int default_value)
 uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
 {
     uint32_t quota_count = default_cpu_count;
-    std::filesystem::path prefix = "/sys/fs/cgroup";
+    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
     /// cgroupsv2
-    std::ifstream contr_file(prefix / "cgroup.controllers");
+    std::ifstream contr_file(default_cgroups_mount / "cgroup.controllers");
     if (contr_file.is_open())
     {
         /// First, we identify the cgroup the process belongs
@@ -51,16 +50,15 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
 
         std::filesystem::path current_cgroup;
         if (cgroup_name.empty())
-            current_cgroup = prefix;
+            current_cgroup = default_cgroups_mount;
         else
-            current_cgroup = prefix / cgroup_name;
+            current_cgroup = default_cgroups_mount / cgroup_name;
 
         // Looking for cpu.max in directories from the current cgroup to the top level
         // It does not stop on the first time since the child could have a greater value than parent
-        while (current_cgroup != prefix.parent_path())
+        while (current_cgroup != default_cgroups_mount.parent_path())
         {
             std::ifstream cpu_max_file(current_cgroup / "cpu.max");
-            current_cgroup = current_cgroup.parent_path();
             if (cpu_max_file.is_open())
             {
                 std::string cpu_limit_str;
@@ -72,10 +70,11 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
                     quota_count = std::min(static_cast<uint32_t>(ceil(cpu_limit / cpu_period)), quota_count);
                 }
             }
+            current_cgroup = current_cgroup.parent_path();
         }
-        current_cgroup = prefix / cgroup_name;
+        current_cgroup = default_cgroups_mount / cgroup_name;
         // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level
-        while (current_cgroup != prefix.parent_path())
+        while (current_cgroup != default_cgroups_mount.parent_path())
         {
             std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective");
             current_cgroup = current_cgroup.parent_path();
@@ -113,8 +112,8 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
     /// cgroupsv1
     /// Return the number of milliseconds per period process is guaranteed to run.
     /// -1 for no quota
-    int cgroup_quota = readFrom(prefix / "cpu/cpu.cfs_quota_us", -1);
-    int cgroup_period = readFrom(prefix / "cpu/cpu.cfs_period_us", -1);
+    int cgroup_quota = readFrom(default_cgroups_mount / "cpu/cpu.cfs_quota_us", -1);
+    int cgroup_period = readFrom(default_cgroups_mount / "cpu/cpu.cfs_period_us", -1);
     if (cgroup_quota > -1 && cgroup_period > 0)
         quota_count = static_cast<uint32_t>(ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period)));
 
@@ -178,24 +177,25 @@ catch (...)
 
 unsigned getNumberOfPhysicalCPUCoresImpl()
 {
-    unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
+    unsigned cores = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
 
+
+#if defined(__x86_64__) && defined(OS_LINUX)
     /// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
     /// Aarch64 and RISC-V don't have SMT so far.
     /// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.
-
-#if defined(__x86_64__) && defined(OS_LINUX)
+    ///
     /// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
     /// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
-    if (cpu_count >= 32)
-        cpu_count = physical_concurrency();
+    if (cores >= 32)
+        cores = physical_concurrency();
 #endif
 
 #if defined(OS_LINUX)
-    cpu_count = getCGroupLimitedCPUCores(cpu_count);
+    cores = getCGroupLimitedCPUCores(cores);
 #endif
 
-    return cpu_count;
+    return cores;
 }
 
 }
@@ -203,6 +203,6 @@ unsigned getNumberOfPhysicalCPUCoresImpl()
 unsigned getNumberOfPhysicalCPUCores()
 {
     /// Calculate once.
-    static auto res = getNumberOfPhysicalCPUCoresImpl();
-    return res;
+    static auto cores = getNumberOfPhysicalCPUCoresImpl();
+    return cores;
 }

From ad0c0d87276e7a2a9eedee07a41a55895f7d88a5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 11:47:38 +0000
Subject: [PATCH 234/884] Factorize cgroup memory access code

This fixes the edge case that cgroup memory limit "0" is no longer
interpreted as "no limit" but as actual zero available memory, see (*).
The cgroup default setting "max" continues to be interpreted as "no
limit".

(*) https://docs.kernel.org/admin-guide/cgroup-v2.html
---
 base/base/getMemoryAmount.cpp | 51 ++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index 2e89ac52650..167cd807774 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -13,6 +13,28 @@
 #endif
 
 
+namespace
+{
+
+std::optional<uint64_t> getCgroupsMemoryLimit(const std::string & setting)
+{
+#if defined(OS_LINUX)
+    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
+    std::ifstream file(default_cgroups_mount / setting);
+    if (!file.is_open())
+        return {};
+    uint64_t value;
+    if (file >> value)
+        return {value};
+    else
+        return {}; /// e.g. the cgroups default "max"
+#else
+    return {};
+#endif
+}
+
+}
+
 /** Returns the size of physical memory (RAM) in bytes.
   * Returns 0 on unsupported platform
   */
@@ -28,35 +50,20 @@ uint64_t getMemoryAmountOrZero()
 
     uint64_t memory_amount = num_pages * page_size;
 
-#if defined(OS_LINUX)
-    /// Limit the memory amount by limits set by cgroups
-    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
+    /// Respect the memory limit set by cgroups
 
     /// cgroups v2
-    std::ifstream cgroupv2_limit(default_cgroups_mount / "memory.max");
-    if (cgroupv2_limit.is_open())
-    {
-        uint64_t memory_limit = 0;
-        cgroupv2_limit >> memory_limit;
-        if (memory_limit > 0 && memory_limit < memory_amount)
-            memory_amount = memory_limit;
-    }
+    auto limit_v2 = getCgroupsMemoryLimit("memory.max");
+    if (limit_v2.has_value() && *limit_v2 < memory_amount)
+         memory_amount = *limit_v2;
     else
     {
-        /// cgroups v1
-        std::ifstream cgroup_limit(default_cgroups_mount / "memory/memory.limit_in_bytes");
-        if (cgroup_limit.is_open())
-        {
-            uint64_t memory_limit = 0; // in case of read error
-            cgroup_limit >> memory_limit;
-            if (memory_limit > 0 && memory_limit < memory_amount)
-                memory_amount = memory_limit;
-        }
+        auto limit_v1 = getCgroupsMemoryLimit("memory/memory.limit_in_bytes");
+        if (limit_v1.has_value() && *limit_v1 < memory_amount)
+             memory_amount = *limit_v1;
     }
-#endif
 
     return memory_amount;
-
 }
 
 
From 3243ea0b66ed9b435011aa6570b70ed080cd5410 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 12:49:03 +0000
Subject: [PATCH 235/884] Remove cgroups v1 support to simplify the code going
 forward

---
 base/base/getMemoryAmount.cpp | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index 167cd807774..060de685778 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -16,7 +16,7 @@
 namespace
 {
 
-std::optional<uint64_t> getCgroupsMemoryLimit(const std::string & setting)
+std::optional<uint64_t> getCgroupsV2MemoryLimit(const std::string & setting)
 {
 #if defined(OS_LINUX)
     std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
@@ -50,18 +50,12 @@ uint64_t getMemoryAmountOrZero()
 
     uint64_t memory_amount = num_pages * page_size;
 
-    /// Respect the memory limit set by cgroups
+    /// Respect the memory limit set by cgroups v2.
+    /// Cgroups v1 is dead since many years and its limits are not considered for simplicity.
 
-    /// cgroups v2
-    auto limit_v2 = getCgroupsMemoryLimit("memory.max");
-    if (limit_v2.has_value() && *limit_v2 < memory_amount)
-         memory_amount = *limit_v2;
-    else
-    {
-        auto limit_v1 = getCgroupsMemoryLimit("memory/memory.limit_in_bytes");
-        if (limit_v1.has_value() && *limit_v1 < memory_amount)
-             memory_amount = *limit_v1;
-    }
+    auto limit = getCgroupsV2MemoryLimit("memory.max");
+    if (limit.has_value() && *limit < memory_amount)
+         memory_amount = *limit;
 
     return memory_amount;
 }

From 03249f3efa4c3dcc1ecbdfe691171acef7009ee5 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 12:24:24 +0000
Subject: [PATCH 236/884] Fixes & comments

staleness -> delay (double -> UInt32)
---
 src/Client/ConnectionEstablisher.cpp        |  5 +-
 src/Client/HedgedConnectionsFactory.cpp     |  2 +-
 src/Common/PoolWithFailoverBase.h           | 93 +++++++++++----------
 src/Processors/QueryPlan/ReadFromRemote.cpp |  4 +-
 4 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp
index a9009e5bb25..4ff4cceee5e 100644
--- a/src/Client/ConnectionEstablisher.cpp
+++ b/src/Client/ConnectionEstablisher.cpp
@@ -79,14 +79,13 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
             return;
         }
 
-        UInt32 delay = table_status_it->second.absolute_delay;
-
+        const UInt32 delay = table_status_it->second.absolute_delay;
         if (delay < max_allowed_delay)
             result.is_up_to_date = true;
         else
         {
             result.is_up_to_date = false;
-            result.staleness = delay;
+            result.delay = delay;
 
             LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
             ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
index 01f9a32ce75..8d21b1032d6 100644
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -413,7 +413,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::setBestUsableReplica(C
         indexes.end(),
         [&](size_t lhs, size_t rhs)
         {
-            return replicas[lhs].connection_establisher->getResult().staleness < replicas[rhs].connection_establisher->getResult().staleness;
+            return replicas[lhs].connection_establisher->getResult().delay < replicas[rhs].connection_establisher->getResult().delay;
         });
 
     replicas[indexes[0]].is_ready = true;
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index ef4bb40535f..6cd72391fc9 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -73,26 +73,20 @@ public:
     {
         TryResult() = default;
 
-        explicit TryResult(Entry entry_)
-            : entry(std::move(entry_))
-            , is_usable(true)
-            , is_up_to_date(true)
-        {
-        }
-
         void reset()
         {
             entry = Entry();
             is_usable = false;
             is_up_to_date = false;
-            staleness = 0.0;
+            delay = 0;
         }
 
-        Entry entry;
-        bool is_usable = false; /// If false, the entry is unusable for current request
-                                /// (but may be usable for other requests, so error counts are not incremented)
-        bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica.
-        double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale.
+        Entry entry; /// use isNull() to check if conection is established
+        bool is_usable = false; /// if connection is established, then can be false only with table check
+                                /// if table is not present on remote peer, -> it'll be false
+        bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica
+                                    /// Depends on max_replica_delay_for_distributed_queries setting
+        UInt32 delay = 0; /// Helps choosing the "least stale" option when all replicas are stale.
     };
 
     struct PoolState;
@@ -249,44 +243,51 @@ PoolWithFailoverBase<TNestedPool>::getMany(
     });
 
     std::string fail_messages;
-    for (size_t i = 0; i < shuffled_pools.size(); ++i)
+    bool finished = false;
+    while (!finished)
     {
-        if (up_to_date_count >= max_entries /// Already enough good entries.
-            || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
-            break;
-
-        ShuffledPool & shuffled_pool = shuffled_pools[i];
-        TryResult & result = try_results[i];
-        if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
-            continue;
-
-        std::string fail_message;
-        result = try_get_entry(*shuffled_pool.pool, fail_message);
-
-        if (!fail_message.empty())
-            fail_messages += fail_message + '\n';
-
-        if (!result.entry.isNull())
+        for (size_t i = 0; i < shuffled_pools.size(); ++i)
         {
-            ++entries_count;
-            if (result.is_usable)
+            if (up_to_date_count >= max_entries /// Already enough good entries.
+                || entries_count + failed_pools_count >= nested_pools.size()) /// No more good entries will be produced.
             {
-                ++usable_count;
-                if (result.is_up_to_date)
-                    ++up_to_date_count;
+                finished = true;
+                break;
             }
-        }
-        else
-        {
-            LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
-            ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
 
-            shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
+            ShuffledPool & shuffled_pool = shuffled_pools[i];
+            TryResult & result = try_results[i];
+            if (max_tries && (shuffled_pool.error_count >= max_tries || !result.entry.isNull()))
+                continue;
 
-            if (shuffled_pool.error_count >= max_tries)
+            std::string fail_message;
+            result = try_get_entry(*shuffled_pool.pool, fail_message);
+
+            if (!fail_message.empty())
+                fail_messages += fail_message + '\n';
+
+            if (!result.entry.isNull())
             {
-                ++failed_pools_count;
-                ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
+                ++entries_count;
+                if (result.is_usable)
+                {
+                    ++usable_count;
+                    if (result.is_up_to_date)
+                        ++up_to_date_count;
+                }
+            }
+            else
+            {
+                LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
+                ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
+
+                shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
+
+                if (shuffled_pool.error_count >= max_tries)
+                {
+                    ++failed_pools_count;
+                    ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
+                }
             }
         }
     }
@@ -302,8 +303,8 @@ PoolWithFailoverBase<TNestedPool>::getMany(
             try_results.begin(), try_results.end(),
             [](const TryResult & left, const TryResult & right)
             {
-                return std::forward_as_tuple(!left.is_up_to_date, left.staleness)
-                    < std::forward_as_tuple(!right.is_up_to_date, right.staleness);
+                return std::forward_as_tuple(!left.is_up_to_date, left.delay)
+                    < std::forward_as_tuple(!right.is_up_to_date, right.delay);
             });
 
     if (fallback_to_stale_replicas)
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 4dd79903965..f92dc06fa7e 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -178,11 +178,11 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
                 throw;
         }
 
-        double max_remote_delay = 0.0;
+        UInt32 max_remote_delay = 0;
         for (const auto & try_result : try_results)
         {
             if (!try_result.is_up_to_date)
-                max_remote_delay = std::max(try_result.staleness, max_remote_delay);
+                max_remote_delay = std::max(try_result.delay, max_remote_delay);
         }
 
         if (try_results.empty() || local_delay < max_remote_delay)

From 9b646b41d5dbb4a4f5af9be70101410ce1eab4a2 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 1 Feb 2024 13:52:56 +0100
Subject: [PATCH 237/884] Update src/Common/ISlotControl.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>
---
 src/Common/ISlotControl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h
index aa7414d5465..daeb956f5a8 100644
--- a/src/Common/ISlotControl.h
+++ b/src/Common/ISlotControl.h
@@ -16,9 +16,9 @@ namespace DB
 //  * free: slot is available to be allocated.
 //  * allocated: slot is allocated to a specific ISlotAllocation.
 //
-// Allocated slots can be considered as:
+// Allocated slots can be in one of the following states:
 //  * granted: allocated, but not yet acquired.
-//  * acquired: acquired using IAcquiredSlot.
+//  * acquired: a granted slot becomes acquired by using IAcquiredSlot.
 //
 // Example for CPU (see ConcurrencyControl.h). Every slot represents one CPU in the system.
 // Slot allocation is a request to allocate specific number of CPUs for a specific query.

From e6061fa9f9cb53ae1896a6010ae73343f11ea11a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 1 Feb 2024 14:46:16 +0100
Subject: [PATCH 238/884] Revert "Update libxml2 version to address some bogus
 security issues"

---
 contrib/libxml2                               |   2 +-
 .../linux_x86_64/include/libxml/xmlversion.h  | 108 ++++++++----------
 2 files changed, 51 insertions(+), 59 deletions(-)

diff --git a/contrib/libxml2 b/contrib/libxml2
index 8292f361458..223cb03a5d2 160000
--- a/contrib/libxml2
+++ b/contrib/libxml2
@@ -1 +1 @@
-Subproject commit 8292f361458fcffe0bff515a385be02e9d35582c
+Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
index d8535e91a0e..c2faeb47cb1 100644
--- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
+++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
@@ -21,7 +21,7 @@ extern "C" {
  * your library and includes mismatch
  */
 #ifndef LIBXML2_COMPILING_MSCCDEF
-XMLPUBFUN void xmlCheckVersion(int version);
+XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 #endif /* LIBXML2_COMPILING_MSCCDEF */
 
 /**
@@ -29,28 +29,28 @@ XMLPUBFUN void xmlCheckVersion(int version);
  *
  * the version string like "1.2.3"
  */
-#define LIBXML_DOTTED_VERSION "2.12.4"
+#define LIBXML_DOTTED_VERSION "2.10.3"
 
 /**
  * LIBXML_VERSION:
  *
  * the version number: 1.2.3 value is 10203
  */
-#define LIBXML_VERSION 21204
+#define LIBXML_VERSION 21003
 
 /**
  * LIBXML_VERSION_STRING:
  *
  * the version number string, 1.2.3 value is "10203"
  */
-#define LIBXML_VERSION_STRING "21204"
+#define LIBXML_VERSION_STRING "21003"
 
 /**
  * LIBXML_VERSION_EXTRA:
  *
  * extra version information, used to show a git commit description
  */
-#define LIBXML_VERSION_EXTRA "-GITv2.12.4"
+#define LIBXML_VERSION_EXTRA ""
 
 /**
  * LIBXML_TEST_VERSION:
@@ -58,7 +58,7 @@ XMLPUBFUN void xmlCheckVersion(int version);
  * Macro to check that the libxml version in use is compatible with
  * the version the software has been compiled against
  */
-#define LIBXML_TEST_VERSION xmlCheckVersion(21204);
+#define LIBXML_TEST_VERSION xmlCheckVersion(21003);
 
 #ifndef VMS
 #if 0
@@ -270,7 +270,7 @@ XMLPUBFUN void xmlCheckVersion(int version);
  *
  * Whether iconv support is available
  */
-#if 1
+#if 0
 #define LIBXML_ICONV_ENABLED
 #endif
 
@@ -313,7 +313,7 @@ XMLPUBFUN void xmlCheckVersion(int version);
 /**
  * LIBXML_DEBUG_RUNTIME:
  *
- * Removed
+ * Whether the runtime debugging is configured in
  */
 #if 0
 #define LIBXML_DEBUG_RUNTIME
@@ -409,7 +409,12 @@ XMLPUBFUN void xmlCheckVersion(int version);
 #endif
 
 #ifdef __GNUC__
-/** DOC_DISABLE */
+
+/**
+ * ATTRIBUTE_UNUSED:
+ *
+ * Macro used to signal to GCC unused function parameters
+ */
 
 #ifndef ATTRIBUTE_UNUSED
 # if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7)))
@@ -419,6 +424,12 @@ XMLPUBFUN void xmlCheckVersion(int version);
 # endif
 #endif
 
+/**
+ * LIBXML_ATTR_ALLOC_SIZE:
+ *
+ * Macro used to indicate to GCC this is an allocator function
+ */
+
 #ifndef LIBXML_ATTR_ALLOC_SIZE
 # if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
 #  define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
@@ -429,6 +440,12 @@ XMLPUBFUN void xmlCheckVersion(int version);
 # define LIBXML_ATTR_ALLOC_SIZE(x)
 #endif
 
+/**
+ * LIBXML_ATTR_FORMAT:
+ *
+ * Macro used to indicate to GCC the parameter are printf like
+ */
+
 #ifndef LIBXML_ATTR_FORMAT
 # if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))
 #  define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args)))
@@ -440,69 +457,44 @@ XMLPUBFUN void xmlCheckVersion(int version);
 #endif
 
 #ifndef XML_DEPRECATED
-#  if defined (IN_LIBXML) || (__GNUC__ * 100 + __GNUC_MINOR__ < 301)
+#  ifdef IN_LIBXML
 #    define XML_DEPRECATED
-/* Available since at least GCC 3.1 */
 #  else
+/* Available since at least GCC 3.1 */
 #    define XML_DEPRECATED __attribute__((deprecated))
 #  endif
 #endif
 
-#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)
-  #if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 800)
-    #define XML_IGNORE_FPTR_CAST_WARNINGS \
-      _Pragma("GCC diagnostic push") \
-      _Pragma("GCC diagnostic ignored \"-Wpedantic\"") \
-      _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
-  #else
-    #define XML_IGNORE_FPTR_CAST_WARNINGS \
-      _Pragma("GCC diagnostic push") \
-      _Pragma("GCC diagnostic ignored \"-Wpedantic\"")
-  #endif
-  #define XML_POP_WARNINGS \
-    _Pragma("GCC diagnostic pop")
-#else
-  #define XML_IGNORE_FPTR_CAST_WARNINGS
-  #define XML_POP_WARNINGS
-#endif
-
 #else /* ! __GNUC__ */
+/**
+ * ATTRIBUTE_UNUSED:
+ *
+ * Macro used to signal to GCC unused function parameters
+ */
 #define ATTRIBUTE_UNUSED
+/**
+ * LIBXML_ATTR_ALLOC_SIZE:
+ *
+ * Macro used to indicate to GCC this is an allocator function
+ */
 #define LIBXML_ATTR_ALLOC_SIZE(x)
+/**
+ * LIBXML_ATTR_FORMAT:
+ *
+ * Macro used to indicate to GCC the parameter are printf like
+ */
 #define LIBXML_ATTR_FORMAT(fmt,args)
+/**
+ * XML_DEPRECATED:
+ *
+ * Macro used to indicate that a function, variable, type or struct member
+ * is deprecated.
+ */
 #ifndef XML_DEPRECATED
-#  if defined (IN_LIBXML) || !defined (_MSC_VER)
-#    define XML_DEPRECATED
-/* Available since Visual Studio 2005 */
-#  elif defined (_MSC_VER) && (_MSC_VER >= 1400)
-#    define XML_DEPRECATED __declspec(deprecated)
-#  endif
-#endif
-#if defined (_MSC_VER) && (_MSC_VER >= 1400)
-#  define XML_IGNORE_FPTR_CAST_WARNINGS __pragma(warning(push))
-#else
-#  define XML_IGNORE_FPTR_CAST_WARNINGS
-#endif
-#ifndef XML_POP_WARNINGS
-#  if defined (_MSC_VER) && (_MSC_VER >= 1400)
-#    define XML_POP_WARNINGS __pragma(warning(pop))
-#  else
-#    define XML_POP_WARNINGS
-#  endif
+#define XML_DEPRECATED
 #endif
 #endif /* __GNUC__ */
 
-#define XML_NO_ATTR
-
-#ifdef LIBXML_THREAD_ENABLED
-  #define XML_DECLARE_GLOBAL(name, type, attrs) \
-    attrs XMLPUBFUN type *__##name(void);
-  #define XML_GLOBAL_MACRO(name) (*__##name())
-#else
-  #define XML_DECLARE_GLOBAL(name, type, attrs) \
-    attrs XMLPUBVAR type name;
-#endif
-
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */

From ecd85096d0879b170a91afd63dbcce50a41a2ed3 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 14:12:11 +0000
Subject: [PATCH 239/884] FIx typo

---
 src/Common/PoolWithFailoverBase.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 6cd72391fc9..e67d819cbf9 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -81,7 +81,7 @@ public:
             delay = 0;
         }
 
-        Entry entry; /// use isNull() to check if conection is established
+        Entry entry; /// use isNull() to check if connection is established
         bool is_usable = false; /// if connection is established, then can be false only with table check
                                 /// if table is not present on remote peer, -> it'll be false
         bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica

From 1a4f56a61ac8dc5dab3245f6574769991bffc927 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Wed, 31 Jan 2024 19:33:04 +0000
Subject: [PATCH 240/884] Fix `ASTAlterCommand::formatImpl` in case of column
 specific settings modifications

---
 src/Parsers/ASTAlterQuery.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index e229095df1b..dd7f40cff95 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -104,6 +104,14 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
         {
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property;
         }
+        else if (settings_changes) {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY SETTING " << (settings.hilite ? hilite_none : "");
+            settings_changes->formatImpl(settings, state, frame);
+        }
+        else if (settings_resets) {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << "RESET SETTING " << (settings.hilite ? hilite_none : "");
+            settings_resets->formatImpl(settings, state, frame);
+        }
         else
         {
             if (first)

From 7c04a1a972f3e639f6f4e6bd3efc4e1c8ba8557d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Wed, 31 Jan 2024 20:32:07 +0000
Subject: [PATCH 241/884] Test query formatting

---
 .../0_stateless/02870_per_column_settings.reference       | 8 ++++++--
 tests/queries/0_stateless/02870_per_column_settings.sql   | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02870_per_column_settings.reference b/tests/queries/0_stateless/02870_per_column_settings.reference
index 144c8c5ee2e..be08e7a682b 100644
--- a/tests/queries/0_stateless/02870_per_column_settings.reference
+++ b/tests/queries/0_stateless/02870_per_column_settings.reference
@@ -1,10 +1,14 @@
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
 1000
+ALTER TABLE tab\n    MODIFY COLUMN `long_string`MODIFY SETTING min_compress_block_size = 8192
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (min_compress_block_size = 8192, max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
+ALTER TABLE tab\n    MODIFY COLUMN `long_string`RESET SETTING min_compress_block_size
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
+ALTER TABLE tab\n    MODIFY COLUMN `long_string` REMOVE SETTINGS
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String,\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
+ALTER TABLE tab\n    MODIFY COLUMN `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840)
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
---- 
+---
 (0,0)	0
 (1,1)	1
 (2,2)	2
@@ -15,4 +19,4 @@ CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS
 (7,7)	7
 (8,8)	8
 (9,9)	9
---- 
+---
diff --git a/tests/queries/0_stateless/02870_per_column_settings.sql b/tests/queries/0_stateless/02870_per_column_settings.sql
index 345cf5cc744..d242ebe6c61 100644
--- a/tests/queries/0_stateless/02870_per_column_settings.sql
+++ b/tests/queries/0_stateless/02870_per_column_settings.sql
@@ -23,21 +23,25 @@ SHOW CREATE tab;
 INSERT INTO TABLE tab SELECT number, randomPrintableASCII(1000), randomPrintableASCII(10), rand(number), rand(number+1), rand(number+2) FROM numbers(1000);
 SELECT count() FROM tab;
 
+SELECT formatQuery('ALTER TABLE tab MODIFY COLUMN long_string MODIFY SETTING min_compress_block_size = 8192;');
 ALTER TABLE tab MODIFY COLUMN long_string MODIFY SETTING min_compress_block_size = 8192;
 SHOW CREATE tab;
 
+SELECT formatQuery('ALTER TABLE tab MODIFY COLUMN long_string RESET SETTING min_compress_block_size;');
 ALTER TABLE tab MODIFY COLUMN long_string RESET SETTING min_compress_block_size;
 SHOW CREATE tab;
 
+SELECT formatQuery('ALTER TABLE tab MODIFY COLUMN long_string REMOVE SETTINGS;');
 ALTER TABLE tab MODIFY COLUMN long_string REMOVE SETTINGS;
 SHOW CREATE tab;
 
+SELECT formatQuery('ALTER TABLE tab MODIFY COLUMN long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840);');
 ALTER TABLE tab MODIFY COLUMN long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840);
 SHOW CREATE tab;
 
 DROP TABLE tab;
 
-SELECT '--- ';
+SELECT '---';
 
 SET allow_experimental_object_type = 1;
 
@@ -56,7 +60,7 @@ SELECT tup, json.key AS key FROM tab ORDER BY key LIMIT 10;
 
 DROP TABLE tab;
 
-SELECT '--- ';
+SELECT '---';
 
 -- Unsupported column-level settings are rejected
 CREATE TABLE tab

From e8c9500981e9ebda0d4de5ff8db693fbe92a31ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <antaljanosbenjamin@users.noreply.github.com>
Date: Wed, 31 Jan 2024 21:56:15 +0100
Subject: [PATCH 242/884] Update src/Parsers/ASTAlterQuery.cpp

Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
---
 src/Parsers/ASTAlterQuery.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index dd7f40cff95..a6543190904 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -104,12 +104,14 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
         {
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property;
         }
-        else if (settings_changes) {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY SETTING " << (settings.hilite ? hilite_none : "");
+        else if (settings_changes)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " MODIFY SETTING " << (settings.hilite ? hilite_none : "");
             settings_changes->formatImpl(settings, state, frame);
         }
-        else if (settings_resets) {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << "RESET SETTING " << (settings.hilite ? hilite_none : "");
+        else if (settings_resets)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " RESET SETTING " << (settings.hilite ? hilite_none : "");
             settings_resets->formatImpl(settings, state, frame);
         }
         else

From 20311791e0cb5c15273cf58de75ef70cb16bbc8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Thu, 1 Feb 2024 10:34:03 +0000
Subject: [PATCH 243/884] Fix tests

---
 tests/queries/0_stateless/02870_per_column_settings.reference | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02870_per_column_settings.reference b/tests/queries/0_stateless/02870_per_column_settings.reference
index be08e7a682b..c2ae34928bd 100644
--- a/tests/queries/0_stateless/02870_per_column_settings.reference
+++ b/tests/queries/0_stateless/02870_per_column_settings.reference
@@ -1,8 +1,8 @@
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
 1000
-ALTER TABLE tab\n    MODIFY COLUMN `long_string`MODIFY SETTING min_compress_block_size = 8192
+ALTER TABLE tab\n    MODIFY COLUMN `long_string` MODIFY SETTING min_compress_block_size = 8192
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (min_compress_block_size = 8192, max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
-ALTER TABLE tab\n    MODIFY COLUMN `long_string`RESET SETTING min_compress_block_size
+ALTER TABLE tab\n    MODIFY COLUMN `long_string` RESET SETTING min_compress_block_size
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String SETTINGS (max_compress_block_size = 163840),\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
 ALTER TABLE tab\n    MODIFY COLUMN `long_string` REMOVE SETTINGS
 CREATE TABLE default.tab\n(\n    `id` UInt64,\n    `long_string` String,\n    `v1` String,\n    `v2` UInt64,\n    `v3` Float32,\n    `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192

From d322accd89a79ab1c8c6817426c637b4a99b466d Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 1 Feb 2024 12:46:18 +0000
Subject: [PATCH 244/884] CI: fix workflow run_command in bugfix validation

---
 .github/workflows/pull_request.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 56617294fb6..9c08363f674 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -491,11 +491,11 @@ jobs:
       run_command: |
         TEMP_PATH="${TEMP_PATH}/integration" \
           python3 integration_test_check.py "Integration $CHECK_NAME" \
-            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
 
         TEMP_PATH="${TEMP_PATH}/stateless" \
           python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
-            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
 
         python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
 ##############################################################################################

From a0935770bba84796f3f611f2022110a283114013 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Thu, 1 Feb 2024 14:50:29 +0000
Subject: [PATCH 245/884] Fix docs

---
 .../sql-reference/statements/alter/column.md  | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 676d30f5e44..193136cf8ba 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -139,8 +139,8 @@ ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used fo
 ## MODIFY COLUMN
 
 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
-ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
+ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
 ```
 
 This query changes the `name` column properties:
@@ -153,10 +153,14 @@ This query changes the `name` column properties:
 
 - TTL
 
+- Column-level Settings
+
 For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs).
 
 For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
 
+For examples of colum-level settings modifying, see [Column-level Settings](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#column-level-settings).
+
 If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
 
 When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly.
@@ -209,7 +213,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved
 
 ## MODIFY COLUMN REMOVE
 
-Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`.
+Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTINGS`.
 
 Syntax:
 
@@ -237,7 +241,7 @@ Modify a column setting.
 Syntax:
 
 ```sql
-ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
+ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING name=value,...;
 ```
 
 **Example**
@@ -245,7 +249,7 @@ ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
 Modify column's `max_compress_block_size` to `1MB`:
 
 ```sql
-ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576;
+ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING max_compress_block_size = 1048576;
 ```
 
 ## MODIFY COLUMN RESET SETTING
@@ -255,21 +259,21 @@ Reset a column setting, also removes the setting declaration in the column expre
 Syntax:
 
 ```sql
-ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...;
+ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING name,...;
 ```
 
 **Example**
 
-Remove column setting `max_compress_block_size` to `1MB`:
+Reset column setting `max_compress_block_size` to it's default value:
 
 ```sql
-ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size;
+ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_block_size;
 ```
 
 ## MATERIALIZE COLUMN
 
 Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
-It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive. 
+It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
 Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 Syntax:

From 3108a988dea5807a177bb0e02489bbf9da2c0aa7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 1 Feb 2024 15:53:13 +0100
Subject: [PATCH 246/884] Fix possible uncaught exception during distributed
 query cancellation

Cancellation of distributed queries may throw (i.e. some timeout), and
in case of pipeline had not been properly created properly
(EXCEPTION_BEFORE_START) cancel will not be sent, so cancellation will
be done from dtor and will throw.

<details>

<summary>stacktrace</summary>

```
<Fatal> BaseDaemon: (version 23.9.2.56 (official build), build id: 76109A79FA62B9BC630A6C39438DEA7D28147B68, git hash: a1bf3f1de55abf2354dc498ffbee270be043d633) (from thread 51895) Terminate called for uncaught exception:
<Fatal> BaseDaemon: Code: 209. DB::NetException: Timeout exceeded while reading from socket (socket (10.61.1.50:9000), receive timeout 20000 ms). (SOCKET_TIMEOUT), Stack trace (when copying this message, always include the lines below):
<Fatal> BaseDaemon:
<Fatal> BaseDaemon: 0. ./build_docker/./src/Common/Exception.cpp:98: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000c741d97 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 1. ./build_docker/./contrib/llvm-project/libcxx/include/string:1499: DB::NetException::NetException<String const&>(int, String const&) @ 0x000000001148ace7 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 2. ./build_docker/./src/QueryPipeline/RemoteQueryExecutorReadContext.cpp:101: DB::RemoteQueryExecutorReadContext::checkTimeout(bool) @ 0x000000001148a70a in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 3. ./build_docker/./src/Common/Fiber.h:42: DB::RemoteQueryExecutorReadContext::cancelBefore() @ 0x000000001148aded in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 4. ./build_docker/./src/Common/Fiber.h:27: DB::AsyncTaskExecutor::cancel() @ 0x000000001148969d in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 5. ./build_docker/./src/QueryPipeline/RemoteQueryExecutor.cpp:169: DB::RemoteQueryExecutor::~RemoteQueryExecutor() @ 0x000000001147a9d0 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 6. ./build_docker/./contrib/llvm-project/libcxx/src/include/atomic_support.h:74: std::__shared_ptr_emplace<DB::RemoteSource, std::allocator<DB::RemoteSource>>::__on_zero_shared() @ 0x000000000c94a4a1 in /usr/lib/debug/usr/bin/clickhou

<Fatal> BaseDaemon: 7. ./build_docker/./contrib/llvm-project/libcxx/src/include/atomic_support.h:74: std::__shared_ptr_emplace<std::vector<std::shared_ptr<DB::IProcessor>, std::allocator<std::shared_ptr<DB::IProcessor>>>, std::allocator<std::vector<std::
or<std::shared_ptr<DB::IProcessor>>>>>::__on_zero_shared() @ 0x00000000114603a1 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 8. ./build_docker/./contrib/llvm-project/libcxx/src/include/atomic_support.h:74: DB::QueryPipeline::~QueryPipeline() @ 0x000000001146347a in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 9. ./build_docker/./src/QueryPipeline/QueryPipeline.cpp:709: DB::QueryPipeline::reset() @ 0x0000000011468f78 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 10. ./build_docker/./src/Interpreters/executeQuery.cpp:1427: DB::executeQuery(DB::ReadBuffer&, DB::WriteBuffer&, bool, std::shared_ptr<DB::Context>, std::function<void (DB::QueryResultDetails const&)>, std::optional<DB::FormatSettings> const&, std::function<void (DB::IOutputFormat&)>) @ 0x000000001249aa8b in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 11. ./build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:818: DB::HTTPHandler::processQuery(DB::HTTPServerRequest&, DB::HTMLForm&, DB::HTTPServerResponse&, DB::HTTPHandler::Output&, std::optional<DB::CurrentThread::QueryScope>&) @ 0x00000000132bddb4 in /usr/lib/debug/usr/bin/clickhouse.debug
<Fatal> BaseDaemon: 12. ./build_docker/./contrib/llvm-project/libcxx/include/__memory/unique_ptr.h:290: DB::HTTPHandler::handleRequest(DB::HTTPServerRequest&, DB::HTTPServerResponse&) @ 0x00000000132c23a4 in /usr/lib/debug/usr/bin/clickhouse.debug
```

</details>

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/QueryPipeline/RemoteQueryExecutor.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 136a3bb09c6..e053bd2703a 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -182,7 +182,19 @@ RemoteQueryExecutor::~RemoteQueryExecutor()
     {
         /// Set was_cancelled, so the query won't be sent after creating connections.
         was_cancelled = true;
-        read_context->cancel();
+
+        /// Cancellation may throw (i.e. some timeout), and in case of pipeline
+        /// had not been properly created properly (EXCEPTION_BEFORE_START)
+        /// cancel will not be sent, so cancellation will be done from dtor and
+        /// will throw.
+        try
+        {
+            read_context->cancel();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log ? log : getLogger("RemoteQueryExecutor"));
+        }
     }
 
     /** If interrupted in the middle of the loop of communication with replicas, then interrupt

From 72ddc6af02208818dfcbf03d364035ab22016ae8 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 15:47:13 +0000
Subject: [PATCH 247/884] Remove unused includes

---
 src/Common/Epoll.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Common/Epoll.h b/src/Common/Epoll.h
index 5eadd5a7a65..a200f0c2fb4 100644
--- a/src/Common/Epoll.h
+++ b/src/Common/Epoll.h
@@ -2,8 +2,6 @@
 #if defined(OS_LINUX)
 
 #include <sys/epoll.h>
-#include <vector>
-#include <functional>
 #include <boost/noncopyable.hpp>
 #include <Poco/Logger.h>
 

From d485e36f20ce141c266d4ae4b53735daa2c1c3e7 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 1 Feb 2024 16:57:46 +0100
Subject: [PATCH 248/884] Fix style

---
 src/Interpreters/InterpreterCreateQuery.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index a4d93eb623b..5e63d580c8b 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -99,7 +99,6 @@ namespace ErrorCodes
     extern const int DATABASE_ALREADY_EXISTS;
     extern const int BAD_ARGUMENTS;
     extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE;
-    extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_INDEX;

From 533f7d03148dd9368e93ea28fe61ba2bdd2a837c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 1 Feb 2024 16:09:00 +0000
Subject: [PATCH 249/884] Build fix

---
 src/Coordination/Changelog.cpp                | 235 +++++++++++++-----
 src/Coordination/Changelog.h                  |  18 +-
 src/Coordination/InMemoryLogStore.cpp         |   6 +
 src/Coordination/InMemoryLogStore.h           |   2 +
 src/Coordination/tests/gtest_coordination.cpp |   7 +
 5 files changed, 191 insertions(+), 77 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 3b9b0b26d04..a2b5905f776 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -507,17 +507,17 @@ struct ChangelogReadResult
     /// Total entries read from log including skipped.
     /// Useful when we decide to continue to write in the same log and want to know
     /// how many entries was already written in it.
-    uint64_t total_entries_read_from_log;
+    uint64_t total_entries_read_from_log{0};
 
     /// First index in log
-    uint64_t log_start_index;
+    uint64_t log_start_index{0};
 
     /// First entry actually read log (not including skipped)
-    uint64_t first_read_index;
+    uint64_t first_read_index{0};
     /// Last entry read from log (last entry in log)
     /// When we don't skip anything last_read_index - first_read_index = total_entries_read_from_log.
     /// But when some entries from the start of log can be skipped because they are not required.
-    uint64_t last_read_index;
+    uint64_t last_read_index{0};
 
     /// last offset we were able to read from log
     off_t last_position;
@@ -610,7 +610,7 @@ public:
 
                 /// Check for duplicated changelog ids
                 if (entry_storage.contains(record.header.index))
-                    entry_storage.cleanAfter(record.header.index + 1);
+                    entry_storage.cleanAfter(record.header.index - 1);
 
                 result.total_entries_read_from_log += 1;
 
@@ -814,7 +814,9 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, LogEntryPtr log_en
     auto entry_size = logEntrySize(*log_entry);
     auto [_, inserted] = cache.emplace(index, std::move(log_entry));
     if (!inserted)
+    {
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
+    }
     updateStatsWithNewEntry(index, entry_size);
 }
 
@@ -826,6 +828,7 @@ void LogEntryStorage::InMemoryCache::addEntry(IndexToCacheEntryNode && node)
     auto result = cache.insert(std::move(node));
     if (!result.inserted)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
+
     updateStatsWithNewEntry(index, entry_size);
 }
 
@@ -833,7 +836,7 @@ void LogEntryStorage::InMemoryCache::addPrefetchedEntry(uint64_t index, size_t s
 {
     auto [_, inserted] = cache.emplace(index, nullptr);
     if (!inserted)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to set prefetched entry with index {} which is already present in cache", index);
     updateStatsWithNewEntry(index, size);
 }
 
@@ -893,52 +896,51 @@ LogEntryPtr LogEntryStorage::InMemoryCache::getEntry(uint64_t index) const
 
 void LogEntryStorage::InMemoryCache::cleanUpTo(uint64_t index)
 {
-    if (index <= min_index_in_cache)
+    if (empty() || index <= min_index_in_cache)
         return;
 
     if (index > max_index_in_cache)
     {
         cache.clear();
         cache_size = 0;
+        return;
     }
-    else
-    {
-        for (size_t i = min_index_in_cache; i < index; ++i)
-        {
-            auto it = cache.find(i);
-            if (it == cache.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
 
-            cache_size -= logEntrySize(*it->second.entry);
-            cache.erase(it);
-        }
-        min_index_in_cache = index;
+    for (size_t i = min_index_in_cache; i < index; ++i)
+    {
+        auto it = cache.find(i);
+        if (it == cache.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
+
+        cache_size -= logEntrySize(*it->second.entry);
+        cache.erase(it);
     }
+    min_index_in_cache = index;
 }
 
 void LogEntryStorage::InMemoryCache::cleanAfter(uint64_t index)
 {
-    if (index >= max_index_in_cache)
+    if (empty() || index >= max_index_in_cache)
         return;
 
     if (index < min_index_in_cache)
     {
         cache.clear();
         cache_size = 0;
+        return;
     }
-    else
-    {
-        for (size_t i = index + 1; i < max_index_in_cache; ++i)
-        {
-            auto it = cache.find(i);
-            if (it == cache.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
 
-            cache_size -= logEntrySize(*it->second.entry);
-            cache.erase(it);
-        }
-        max_index_in_cache = index;
+    for (size_t i = index + 1; i <= max_index_in_cache; ++i)
+    {
+        auto it = cache.find(i);
+        if (it == cache.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i);
+
+        cache_size -= logEntrySize(*it->second.entry);
+        cache.erase(it);
     }
+
+    max_index_in_cache = index;
 }
 
 void LogEntryStorage::InMemoryCache::clear()
@@ -973,12 +975,6 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry)
         latest_config_index = index;
         conf_logs_indices.insert(index);
     }
-
-    if (first_log_entry == nullptr)
-    {
-        first_log_index = index;
-        first_log_entry = log_entry;
-    }
 }
 
 bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size)
@@ -1000,11 +996,14 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l
         if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size))
             commit_logs_cache.addEntry(std::move(entry_handle));
     }
-
     latest_logs_cache.addEntry(index, log_entry);
 
     logs_location.emplace(index, std::move(log_location));
 
+    if (logs_location.size() == 1)
+        min_index_with_location = index;
+    max_index_with_location = index;
+
     if (log_entry->get_val_type() == nuraft::conf)
     {
         latest_config = log_entry;
@@ -1019,7 +1018,28 @@ void LogEntryStorage::cleanUpTo(uint64_t index)
     /// uncommitted logs should never be compacted so we don't have to handle
     /// logs that are currently being prefetched
     commit_logs_cache.cleanUpTo(index);
-    std::erase_if(logs_location, [&](const auto & item) { return item.first < index; });
+
+    if (!logs_location.empty() && index > min_index_with_location)
+    {
+        if (index > max_index_with_location)
+        {
+            logs_location.clear();
+        }
+        else
+        {
+            for (size_t i = min_index_with_location; i < index; ++i)
+            {
+                auto it = logs_location.find(i);
+                if (it == logs_location.end())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from logs location", i);
+
+                logs_location.erase(it);
+            }
+
+            min_index_with_location = index;
+        }
+    }
+
     std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index < index; });
     if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end())
     {
@@ -1029,6 +1049,8 @@ void LogEntryStorage::cleanUpTo(uint64_t index)
     else
         latest_config = nullptr;
 
+    if (first_log_index < index)
+        first_log_entry = nullptr;
 }
 
 void LogEntryStorage::cleanAfter(uint64_t index)
@@ -1037,6 +1059,9 @@ void LogEntryStorage::cleanAfter(uint64_t index)
     /// if we cleared all latest logs, there is a possibility we would need to clear commit logs
     if (latest_logs_cache.empty())
     {
+        /// we will clean everything after the index, if there is a prefetch in progress
+        /// wait until we fetch everything until index
+        /// afterwards we can stop prefetching of newer logs because they will be cleaned up
         commit_logs_cache.getEntry(index);
         if (current_prefetch_info && !current_prefetch_info->done)
         {
@@ -1052,10 +1077,28 @@ void LogEntryStorage::cleanAfter(uint64_t index)
         startCommitLogsPrefetch(keeper_context->lastCommittedIndex());
     }
 
-    std::erase_if(logs_location, [&](const auto & item) { return item.first > index; });
-    if (!logs_location.empty())
-        max_index_with_location = index;
-    else if (latest_logs_cache.empty())
+    if (!logs_location.empty() && index < max_index_with_location)
+    {
+        if (index < min_index_with_location)
+        {
+            logs_location.clear();
+        }
+        else
+        {
+            for (size_t i = index + 1; i <= max_index_with_location; ++i)
+            {
+                auto it = logs_location.find(i);
+                if (it == logs_location.end())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from logs location", i);
+
+                logs_location.erase(it);
+            }
+
+            max_index_with_location = index;
+        }
+    }
+
+    if (empty())
         /// if we don't store any logs, reset first log cache
         first_log_entry = nullptr;
 
@@ -1067,6 +1110,9 @@ void LogEntryStorage::cleanAfter(uint64_t index)
     }
     else
         latest_config = nullptr;
+
+    if (first_log_index > index)
+        first_log_entry = nullptr;
 }
 
 bool LogEntryStorage::contains(uint64_t index) const
@@ -1110,6 +1156,13 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
 
         auto record = readChangelogRecord(*file, changelog_description->path);
         entry = logEntryFromRecord(record);
+
+        if (first_log_entry == nullptr && index == getFirstIndex())
+        {
+            first_log_index = index;
+            first_log_entry = entry;
+        }
+
         ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
     }
     return entry;
@@ -1158,6 +1211,9 @@ void LogEntryStorage::refreshCache()
 
     for (auto & [index, log_location] : new_unapplied_indices_with_log_locations)
     {
+        if (logs_location.empty())
+            min_index_with_location = index;
+
         logs_location.emplace(index, std::move(log_location));
         max_index_with_location = index;
     }
@@ -1202,6 +1258,7 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end
         {
             auto record = readChangelogRecord(*file, file_description->path);
             ret->push_back(logEntryFromRecord(record));
+            ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
         }
 
         read_info.reset();
@@ -1253,6 +1310,44 @@ bool LogEntryStorage::isConfLog(uint64_t index) const
     return conf_logs_indices.contains(index);
 }
 
+size_t LogEntryStorage::empty() const
+{
+    return logs_location.empty() && latest_logs_cache.empty();
+}
+
+size_t LogEntryStorage::size() const
+{
+    if (empty())
+        return 0;
+
+    size_t min_index = 0;
+    size_t max_index = 0;
+
+    if (!logs_location.empty())
+    {
+        min_index = min_index_with_location;
+        max_index = max_index_with_location;
+    }
+    else
+        min_index = latest_logs_cache.min_index_in_cache;
+
+    if (!latest_logs_cache.empty())
+        max_index = latest_logs_cache.max_index_in_cache;
+
+    return max_index - min_index + 1;
+}
+
+size_t LogEntryStorage::getFirstIndex() const
+{
+    if (!logs_location.empty())
+        return min_index_with_location;
+
+    if (!latest_logs_cache.empty())
+        return latest_logs_cache.min_index_in_cache;
+
+    return 0;
+}
+
 void LogEntryStorage::shutdown()
 {
     if (std::exchange(is_shutdown, true))
@@ -1399,6 +1494,7 @@ Changelog::Changelog(
 }
 
 void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep)
+try
 {
     std::lock_guard writer_lock(writer_mutex);
     std::optional<ChangelogReadResult> last_log_read_result;
@@ -1440,7 +1536,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
                         changelog_description.from_log_index);
                     /// Nothing to do with our more fresh log, leader will overwrite them, so remove everything and just start from last_commited_index
                     removeAllLogs();
-                    min_log_id = last_commited_log_index;
                     max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1;
                     current_writer->rotate(max_log_id + 1);
                     initialized = true;
@@ -1480,10 +1575,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
 
             last_log_read_result->log_start_index = changelog_description.from_log_index;
 
-            /// Otherwise we have already initialized it
-            if (min_log_id == 0)
-                min_log_id = last_log_read_result->first_read_index;
-
             if (last_log_read_result->last_read_index != 0)
                 max_log_id = last_log_read_result->last_read_index;
 
@@ -1506,12 +1597,10 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
     };
 
     /// we can have empty log (with zero entries) and last_log_read_result will be initialized
-    if (!last_log_read_result || min_log_id == 0) /// We just may have no logs (only snapshot or nothing)
+    if (!last_log_read_result || entry_storage.empty()) /// We just may have no logs (only snapshot or nothing)
     {
         /// Just to be sure they don't exist
         removeAllLogs();
-
-        min_log_id = last_commited_log_index;
         max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1;
     }
     else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs
@@ -1523,7 +1612,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             last_commited_log_index - 1);
 
         removeAllLogs();
-        min_log_id = last_commited_log_index;
         max_log_id = last_commited_log_index - 1;
     }
     else if (last_log_is_not_complete) /// if it's complete just start new one
@@ -1554,9 +1642,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         }
         else if (last_log_read_result->error)
         {
-            LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path);
+            LOG_INFO(log, "Changelog {} read finished with error but some logs were read from it, file will not be removed", description->path);
             remove_invalid_logs();
-            entry_storage.cleanAfter(last_log_read_result->log_start_index);
+            entry_storage.cleanAfter(last_log_read_result->last_read_index);
             move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
         }
         /// don't mix compressed and uncompressed writes
@@ -1591,10 +1679,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             moveFileBetweenDisks(description->disk, description, disk, description->path);
     }
 
-    if (size() != 0)
-        entry_storage.cacheFirstLog(min_log_id);
     initialized = true;
 }
+catch (...)
+{
+    tryLogCurrentException(__PRETTY_FUNCTION__);
+
+}
 
 
 void Changelog::initWriter(ChangelogFileDescriptionPtr description)
@@ -1735,7 +1826,7 @@ void Changelog::appendCompletionThread()
         if (auto raft_server_locked = raft_server.lock())
             raft_server_locked->notify_log_append_completion(append_ok);
         else
-            LOG_WARNING(log, "Raft server is not set in LogStore.");
+            LOG_INFO(log, "Raft server is not set in LogStore.");
     }
 }
 
@@ -1848,12 +1939,6 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
 
     entry_storage.addEntry(index, log_entry);
-    if (min_log_id == 0)
-    {
-        min_log_id = index;
-        entry_storage.cacheFirstLog(index);
-    }
-
     max_log_id = index;
 
     if (!write_operations.push(AppendLog{index, log_entry}))
@@ -1967,16 +2052,23 @@ void Changelog::compact(uint64_t up_to_log_index)
         else /// Files are ordered, so all subsequent should exist
             break;
     }
-    /// Compaction from the past is possible, so don't make our min_log_id smaller.
-    min_log_id = std::max(min_log_id, up_to_log_index + 1);
-    entry_storage.cacheFirstLog(min_log_id);
 
     entry_storage.cleanUpTo(up_to_log_index + 1);
 
     if (need_rotate)
         current_writer->rotate(up_to_log_index + 1);
 
-    LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, min_log_id, max_log_id);
+    LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, getStartIndex(), max_log_id);
+}
+
+uint64_t Changelog::getNextEntryIndex() const
+{
+    return max_log_id + 1;
+}
+
+uint64_t Changelog::getStartIndex() const
+{
+    return entry_storage.empty() ? max_log_id + 1 : entry_storage.getFirstIndex();
 }
 
 LogEntryPtr Changelog::getLastEntry() const
@@ -2049,7 +2141,7 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
         buffer.get(buf_local);
 
         LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
-        if (i == 0 && cur_index >= min_log_id && cur_index <= max_log_id)
+        if (i == 0 && cur_index >= entry_storage.getFirstIndex() && cur_index <= max_log_id)
             writeAt(cur_index, log_entry);
         else
             appendEntry(cur_index, log_entry);
@@ -2095,6 +2187,11 @@ std::shared_ptr<bool> Changelog::flushAsync()
     return failed;
 }
 
+uint64_t Changelog::size() const
+{
+    return entry_storage.size();
+}
+
 void Changelog::shutdown()
 {
     LOG_DEBUG(log, "Shutting down Changelog");
@@ -2173,7 +2270,7 @@ bool Changelog::isInitialized() const
 
 void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const
 {
-    if (size() > 0)
+    if (!entry_storage.empty())
     {
         log_info.first_log_idx = getStartIndex();
         auto first_entry = entryAt(log_info.first_log_idx);
@@ -2181,7 +2278,7 @@ void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const
         log_info.first_log_term = first_entry->get_term();
 
         log_info.last_log_idx = max_log_id;
-        auto last_entry = entryAt(log_info.first_log_idx);
+        auto last_entry = entryAt(log_info.last_log_idx);
         chassert(last_entry != nullptr);
         log_info.last_log_term = last_entry->get_term();
     }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 5fdb1a27840..d7152f350f7 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -180,6 +180,10 @@ struct LogEntryStorage
 
     bool isConfLog(uint64_t index) const;
 
+    size_t empty() const;
+    size_t size() const;
+    size_t getFirstIndex() const;
+
     void shutdown();
 private:
     void prefetchCommitLogs();
@@ -222,8 +226,8 @@ private:
     LogEntryPtr latest_config;
     uint64_t latest_config_index = 0;
 
-    LogEntryPtr first_log_entry;
-    uint64_t first_log_index = 0;
+    mutable LogEntryPtr first_log_entry;
+    mutable uint64_t first_log_index = 0;
 
     std::unique_ptr<ThreadFromGlobalPool> commit_logs_prefetcher;
 
@@ -249,6 +253,7 @@ private:
     std::vector<IndexWithLogLocation> unapplied_indices_with_log_locations;
     std::unordered_map<uint64_t, LogLocation> logs_location;
     size_t max_index_with_location = 0;
+    size_t min_index_with_location = 0;
 
     std::unordered_set<uint64_t> conf_logs_indices;
 
@@ -284,9 +289,9 @@ public:
     /// Remove log files with to_log_index <= up_to_log_index.
     void compact(uint64_t up_to_log_index);
 
-    uint64_t getNextEntryIndex() const { return max_log_id + 1; }
+    uint64_t getNextEntryIndex() const;
 
-    uint64_t getStartIndex() const { return min_log_id; }
+    uint64_t getStartIndex() const;
 
     /// Last entry in log, or fake entry with term 0 if log is empty
     LogEntryPtr getLastEntry() const;
@@ -315,7 +320,7 @@ public:
 
     void shutdown();
 
-    uint64_t size() const { return max_log_id - min_log_id + 1; }
+    uint64_t size() const;
 
     uint64_t lastDurableIndex() const
     {
@@ -369,9 +374,6 @@ private:
 
     std::unordered_set<uint64_t> conf_logs_indices;
 
-    /// Start log_id which exists in all "active" logs
-    /// min_log_id + 1 == max_log_id means empty log storage for NuRaft
-    uint64_t min_log_id = 0;
     uint64_t max_log_id = 0;
     /// For compaction, queue of delete not used logs
     /// 128 is enough, even if log is not removed, it's not a problem
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
index ca240584a54..ee93c02b4b0 100644
--- a/src/Coordination/InMemoryLogStore.cpp
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -191,4 +191,10 @@ bool InMemoryLogStore::compact(uint64_t last_log_index)
     return true;
 }
 
+bool InMemoryLogStore::is_conf(uint64_t index)
+{
+    auto entry = entry_at(index);
+    return entry != nullptr && entry->get_val_type() == nuraft::conf;
+}
+
 }
diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h
index fc56826c81b..82c676639d5 100644
--- a/src/Coordination/InMemoryLogStore.h
+++ b/src/Coordination/InMemoryLogStore.h
@@ -39,6 +39,8 @@ public:
 
     bool flush() override { return true; }
 
+    bool is_conf(uint64_t index) override;
+
 private:
     std::map<uint64_t, nuraft::ptr<nuraft::log_entry>> logs TSA_GUARDED_BY(logs_lock);
     mutable std::mutex logs_lock;
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 59a550177a4..5fcf5f85719 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -558,6 +558,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction)
 
     EXPECT_EQ(changelog.size(), 3);
 
+    keeper_context->setLastCommitIndex(2);
     changelog.compact(2);
 
     EXPECT_EQ(changelog.size(), 1);
@@ -582,6 +583,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction)
     EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension));
     EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
 
+    keeper_context->setLastCommitIndex(6);
     changelog.compact(6);
     std::this_thread::sleep_for(std::chrono::microseconds(1000));
 
@@ -1812,7 +1814,10 @@ void testLogAndStateMachine(
             snapshot_task.create_snapshot(std::move(snapshot_task.snapshot));
         }
         if (snapshot_created && changelog.size() > settings->reserved_log_items)
+        {
+            keeper_context->setLastCommitIndex(i - settings->reserved_log_items);
             changelog.compact(i - settings->reserved_log_items);
+        }
     }
 
     SnapshotsQueue snapshots_queue1{1};
@@ -2132,6 +2137,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
 
     waitDurableLogs(changelog_2);
 
+    keeper_context->setLastCommitIndex(105);
     changelog_2.compact(105);
     std::this_thread::sleep_for(std::chrono::microseconds(1000));
 
@@ -2157,6 +2163,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
 
     waitDurableLogs(changelog_3);
 
+    keeper_context->setLastCommitIndex(125);
     changelog_3.compact(125);
     std::this_thread::sleep_for(std::chrono::microseconds(1000));
     assertFileDeleted("./logs/changelog_101_110.bin" + params.extension);

From 479514986378ac3bc0acdf99ca9a140b6efce7a6 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 1 Feb 2024 17:16:42 +0100
Subject: [PATCH 250/884] print stats once

---
 .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp    | 14 +++++++++-----
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h      | 12 ++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 153bcaa6320..6a265d79019 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -6,7 +6,7 @@
 
 namespace ProfileEvents
 {
-extern const Event MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds;
+extern const Event MergeTreeDataWriterSkipIndicesCalculationMicroseconds;
 }
 
 namespace DB
@@ -155,6 +155,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     , default_codec(default_codec_)
     , compute_granularity(index_granularity.empty())
     , compress_primary_key(settings.compress_primary_key)
+    , execution_stats(skip_indices.size(), stats.size())
     , log(getLogger(storage.getLogName() + " (DataPartWriter)"))
 {
     if (settings.blocks_are_granules_size && !index_granularity.empty())
@@ -362,7 +363,6 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
             store = it->second;
         }
 
-        size_t index_build_us = 0;
         for (const auto & granule : granules_to_write)
         {
             if (skip_index_accumulated_marks[i] == index_helper->index.granularity)
@@ -387,16 +387,15 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
                     writeBinaryLittleEndian(1UL, marks_out);
             }
 
-            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds);
+            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSkipIndicesCalculationMicroseconds);
 
             size_t pos = granule.start_row;
             skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write);
             if (granule.is_complete)
                 ++skip_index_accumulated_marks[i];
 
-            index_build_us += watch.elapsed<Microseconds>();
+            execution_stats.skip_indices_build_us[i] += watch.elapsed();
         }
-        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", index_build_us / 1000, skip_indices[i]->index.name, data_part->name);
     }
 }
 
@@ -518,6 +517,11 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
     }
     for (auto & store: gin_index_stores)
         store.second->finalize();
+
+    chassert(execution_stats.skip_indices_build_us.size() == skip_indices.size());
+    for (size_t i = 0; i < skip_indices.size(); ++i)
+        LOG_DEBUG(log, "Spent {} ms calculating index {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name);
+
     gin_index_stores.clear();
     skip_indices_streams.clear();
     skip_indices_aggregators.clear();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index 5292c0d5590..9f2cc3970fa 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -191,6 +191,18 @@ private:
 
     virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
 
+    struct ExecutionStatistics
+    {
+        ExecutionStatistics(size_t skip_indices_cnt, size_t stats_cnt)
+            : skip_indices_build_us(skip_indices_cnt, 0), statistics_build_us(stats_cnt, 0)
+        {
+        }
+
+        std::vector<size_t> skip_indices_build_us; // [i] corresponds to the i-th index
+        std::vector<size_t> statistics_build_us; // [i] corresponds to the i-th stat
+    };
+    ExecutionStatistics execution_stats;
+
     LoggerPtr log;
 };
 

From d5eec2d85b616a13fe5123ab4cdc7f0d3471e425 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Thu, 1 Feb 2024 16:27:57 +0000
Subject: [PATCH 251/884] trying to fix (casting the result to int)

---
 base/base/Decimal_fwd.h                       |   6 +
 src/Functions/FunctionBinaryArithmetic.h      | 137 ++++++++----------
 .../02975_intdiv_with_decimal.reference       |  20 +--
 .../0_stateless/02975_intdiv_with_decimal.sql |   8 +-
 4 files changed, 84 insertions(+), 87 deletions(-)

diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h
index 589d6224917..beb228cea3c 100644
--- a/base/base/Decimal_fwd.h
+++ b/base/base/Decimal_fwd.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <base/types.h>
+#include <base/extended_types.h>
 
 namespace wide
 {
@@ -44,3 +45,8 @@ concept is_over_big_int =
     || std::is_same_v<T, Decimal128>
     || std::is_same_v<T, Decimal256>;
 }
+
+template <> struct is_signed<DB::Decimal32> { static constexpr bool value = true; };
+template <> struct is_signed<DB::Decimal64> { static constexpr bool value = true; };
+template <> struct is_signed<DB::Decimal128> { static constexpr bool value = true; };
+template <> struct is_signed<DB::Decimal256> { static constexpr bool value = true; };
diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index e31183573c3..9b0afee5053 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -152,22 +152,7 @@ public:
     /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
     using ResultDataType = Switch<
         /// Result must be Integer
-        Case<
-            only_integer && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>),
-            Switch<
-                Case<
-                    IsDataTypeDecimal<LeftDataType>,
-                    Switch<
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal256>, DataTypeInt256>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal128>, DataTypeInt128>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal64>, DataTypeInt64>,
-                        Case<std::is_same_v<LeftDataType, DataTypeDecimal32>, DataTypeInt32>>>,
-                Case<
-                    IsDataTypeDecimal<RightDataType>,
-                    Switch<
-                        Case<IsIntegralOrExtended<LeftDataType>, LeftDataType>,
-                        Case<std::is_same_v<LeftDataType, DataTypeFloat64>, DataTypeInt64>,
-                        Case<std::is_same_v<LeftDataType, DataTypeFloat32>, DataTypeInt32>>>>>,
+        Case<IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero, DataTypeFromFieldType<typename Op::ResultType>>,
 
         /// Decimal cases
         Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
@@ -1687,16 +1672,7 @@ public:
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
                         if constexpr (is_div_int || is_div_int_or_zero)
-                        {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
-                                type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
-                                type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
-                                type_res = std::make_shared<DataTypeInt64>();
-                            else
-                                type_res = std::make_shared<DataTypeInt32>();
-                        }
+                            type_res = std::make_shared<ResultDataType>();
                         else
                         {
                             if constexpr (is_division)
@@ -1721,54 +1697,22 @@ public:
                     else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
                         (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)))
                     {
-                        if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal<LeftDataType>)
-                        {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
-                                type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
-                                type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
-                                type_res = std::make_shared<DataTypeInt64>();
-                            else
-                                type_res = std::make_shared<DataTypeInt32>();
-                        }
-                        else if constexpr (is_div_int || is_div_int_or_zero)
-                        {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeFloat64>)
-                                type_res = std::make_shared<DataTypeInt64>();
-                            else
-                                type_res = std::make_shared<DataTypeInt32>();
-                        }
+                        if constexpr (is_div_int || is_div_int_or_zero)
+                            type_res = std::make_shared<ResultDataType>();
                         else
                             type_res = std::make_shared<DataTypeFloat64>();
                     }
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
                     {
                         if constexpr (is_div_int || is_div_int_or_zero)
-                        {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal256>)
-                                type_res = std::make_shared<DataTypeInt256>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal128>)
-                                type_res = std::make_shared<DataTypeInt128>();
-                            else if constexpr (std::is_same_v<LeftDataType, DataTypeDecimal64>)
-                                type_res = std::make_shared<DataTypeInt64>();
-                            else
-                                type_res = std::make_shared<DataTypeInt32>();
-                        }
+                            type_res = std::make_shared<ResultDataType>();
                         else
                             type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
                     }
                     else if constexpr (IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended<LeftDataType>)
-                            type_res = std::make_shared<LeftDataType>();
-                        else if constexpr (is_div_int || is_div_int_or_zero)
-                        {
-                            if constexpr (std::is_same_v<LeftDataType, DataTypeFloat64>)
-                                type_res = std::make_shared<DataTypeInt64>();
-                            else
-                                type_res = std::make_shared<DataTypeInt32>();
-                        }
+                        if constexpr (is_div_int || is_div_int_or_zero)
+                            type_res = std::make_shared<ResultDataType>();
                         else
                             type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
                     }
@@ -2089,10 +2033,8 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
             constexpr bool decimal_with_float = (IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>)
                 || (IsFloatingPoint<LeftDataType> && IsDataTypeDecimal<RightDataType>);
 
-            constexpr bool is_div_int_with_decimal = (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>);
-
-            using T0 = std::conditional_t<decimal_with_float, Float64, std::conditional_t<is_div_int_with_decimal, Int64, typename LeftDataType::FieldType>>;
-            using T1 = std::conditional_t<decimal_with_float, Float64, std::conditional_t<is_div_int_with_decimal, Int64, typename RightDataType::FieldType>>;
+            using T0 = std::conditional_t<decimal_with_float, Float64, typename LeftDataType::FieldType>;
+            using T1 = std::conditional_t<decimal_with_float, Float64, typename RightDataType::FieldType>;
             using ResultType = typename ResultDataType::FieldType;
             using ColVecT0 = ColumnVectorOrDecimal<T0>;
             using ColVecT1 = ColumnVectorOrDecimal<T1>;
@@ -2108,12 +2050,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                 left_col = castColumn(arguments[0], converted_type);
                 right_col = castColumn(arguments[1], converted_type);
             }
-            else if constexpr (is_div_int_with_decimal)
-            {
-                const auto converted_type = std::make_shared<DataTypeInt64>();
-                left_col = castColumn(arguments[0], converted_type);
-                right_col = castColumn(arguments[1], converted_type);
-            }
             else
             {
                 left_col = arguments[0].column;
@@ -2139,6 +2075,61 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                     col_left_size,
                     right_nullmap);
             }
+            else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>))
+            {
+                using DecimalResultType = Switch<
+                    Case<
+                        IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
+                        LeftDataType>,
+                    Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>, RightDataType>,
+                    Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
+                    Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,
+
+                    /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
+                    Case<IsDataTypeDecimal<LeftDataType> && !IsIntegralOrExtendedOrDecimal<RightDataType>, InvalidType>,
+                    Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType)
+
+                if constexpr (!std::is_same_v<DecimalResultType, InvalidType>)
+                {
+                    DataTypePtr type_res;
+                    if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
+                    {
+                        if constexpr (is_division)
+                        {
+                            if (context->getSettingsRef().decimal_check_overflow)
+                            {
+                                /// Check overflow by using operands scale (based on big decimal division implementation details):
+                                /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
+                                /// i.e. int_operand = decimal_operand*10^scale
+                                /// For division, left operand will be scaled by right operand scale also to do big integer division,
+                                /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
+                                /// So, we can check upfront possible overflow just by checking max scale used for left operand
+                                /// Note: it doesn't detect all possible overflow during big decimal division
+                                if (left.getScale() + right.getScale() > DecimalResultType::maxPrecision())
+                                    throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
+                            }
+                        }
+                        DecimalResultType result_type = decimalResultType<is_multiply, is_division>(left, right);
+                        type_res = std::make_shared<DecimalResultType>(result_type.getPrecision(), result_type.getScale());
+                    }
+                    else if constexpr (IsDataTypeDecimal<LeftDataType>)
+                        type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
+                    else
+                        type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
+                    // Create result decimal type somehow, maybe similar to how we do it in getReturnTypeImplStatic
+
+                    auto res = executeNumericWithDecimal<LeftDataType, RightDataType, DecimalResultType>(
+                            left, right,
+                            col_left_const, col_right_const,
+                            col_left, col_right,
+                            col_left_size,
+                            right_nullmap);
+
+                    auto col = ColumnWithTypeAndName(res, type_res, name);
+                    return castColumn(col, std::make_shared<ResultDataType>());
+                }
+                return nullptr;
+            }
             else // can't avoid else and another indentation level, otherwise the compiler would try to instantiate
                  // ColVecResult for Decimals which would lead to a compile error.
             {
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
index 594dcee975a..5540734ae4c 100644
--- a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference
@@ -1,19 +1,19 @@
 2
 2
+1
+2
+2
 2
 2
 2
 2
 2
 2
+1
 2
+1
 2
-2
-2
-2
-2
-2
-2
+1
 2
 2
 2
@@ -34,6 +34,7 @@
 2
 2
 2
+1
 2
 2
 2
@@ -42,12 +43,11 @@
 2
 2
 2
+1
 2
+1
 2
-2
-2
-2
-2
+1
 2
 2
 2
diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
index 18e657caa8a..0911a481251 100644
--- a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
+++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql
@@ -13,9 +13,9 @@ SELECT intDiv(toDecimal256(4.4, 5), toDecimal32(2.2, 2));
 SELECT intDiv(4, toDecimal64(2.2, 2));
 SELECT intDiv(toDecimal32(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDiv(4, toDecimal128(2.2, 3));
-SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 3));
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 2));
 SELECT intDiv(4, toDecimal256(2.2, 4));
-SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 4));
+SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDiv(toDecimal64(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDiv(toDecimal128(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDiv(toDecimal256(4.4, 2), toDecimal64(2.2, 2));
@@ -48,9 +48,9 @@ SELECT intDivOrZero(toDecimal256(4.4, 5), toDecimal32(2.2, 2));
 SELECT intDivOrZero(4, toDecimal64(2.2, 2));
 SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDivOrZero(4, toDecimal128(2.2, 3));
-SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 3));
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 2));
 SELECT intDivOrZero(4, toDecimal256(2.2, 4));
-SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 4));
+SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 2));
 SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal64(2.2, 2));
 SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal64(2.2, 2));

From b1b564a83589ed8282bd5307908e32955fa6a682 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 1 Feb 2024 16:42:11 +0000
Subject: [PATCH 252/884] Fix build again

---
 src/Coordination/Changelog.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index d7152f350f7..e61bcc5f163 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -4,6 +4,8 @@
 #include <Common/ThreadPool_fwd.h>
 #include <Common/ConcurrentBoundedQueue.h>
 
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+
 #include <map>
 #include <unordered_set>
 
@@ -19,7 +21,7 @@ namespace Poco
     class Logger;
 }
 
-using LoggerPtr = std::shared_ptr<Poco::Logger>;
+using LoggerPtr = boost::intrusive_ptr<Poco::Logger>;
 
 namespace DB
 {

From 76f2ae08b503bfffd732187203dbb6ea0c4bde1a Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 1 Feb 2024 18:16:09 +0100
Subject: [PATCH 253/884] collect stats build time

---
 src/Common/ProfileEvents.cpp                           |  3 ++-
 .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp        | 10 ++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index ef0606162a2..2e5001ce413 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -197,7 +197,8 @@
     M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \
     M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \
     \
-    M(MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds, "Time spent calculating secondary indices") \
+    M(MergeTreeDataWriterSkipIndicesCalculationMicroseconds, "Time spent calculating skip indices") \
+    M(MergeTreeDataWriterStatisticsCalculationMicroseconds, "Time spent calculating statistics") \
     M(MergeTreeDataWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
     M(MergeTreeDataWriterMergingBlocksMicroseconds, "Time spent merging input blocks (for special MergeTree engines)") \
     M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 6a265d79019..b8cfdf19f54 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -7,6 +7,7 @@
 namespace ProfileEvents
 {
 extern const Event MergeTreeDataWriterSkipIndicesCalculationMicroseconds;
+extern const Event MergeTreeDataWriterStatisticsCalculationMicroseconds;
 }
 
 namespace DB
@@ -338,9 +339,12 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
 
 void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
 {
-    for (const auto & stat_ptr : stats)
+    for (size_t i = 0; i < stats.size(); ++i)
     {
+        const auto & stat_ptr = stats[i];
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterStatisticsCalculationMicroseconds);
         stat_ptr->update(block.getByName(stat_ptr->columnName()).column);
+        execution_stats.statistics_build_us[i] += watch.elapsed();
     }
 }
 
@@ -494,6 +498,9 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
         if (sync)
             stream->sync();
     }
+
+    for (size_t i = 0; i < stats.size(); ++i)
+        LOG_DEBUG(log, "Spent {} ms calculating statistics {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName());
 }
 
 void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
@@ -518,7 +525,6 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
     for (auto & store: gin_index_stores)
         store.second->finalize();
 
-    chassert(execution_stats.skip_indices_build_us.size() == skip_indices.size());
     for (size_t i = 0; i < skip_indices.size(); ++i)
         LOG_DEBUG(log, "Spent {} ms calculating index {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name);
 

From ce31fa912b2f67411350d2de85a21090726101fa Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 1 Feb 2024 09:24:12 -0800
Subject: [PATCH 254/884] removed unnecessary method overloading and fixed
 documentation

---
 .../functions/time-series-functions.md        |  30 ++---
 src/Functions/seriesOutliersDetectTukey.cpp   | 121 +++++++-----------
 .../02813_seriesOutliersTukey.reference       |  10 +-
 .../0_stateless/02813_seriesOutliersTukey.sql |  23 ++--
 4 files changed, 75 insertions(+), 109 deletions(-)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index bd50ef556f7..bb6f3da25fb 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -10,33 +10,27 @@ Below functions are used for series data analysis.
 
 ## seriesOutliersDetectTukey
 
-Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
 
 **Syntax**
 
 ``` sql
 seriesOutliersDetectTukey(series);
-seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
+seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
 ```
 
 **Arguments**
 
 - `series` - An array of numeric values.
-- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
-- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
-- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
 - `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
 
 At least four data points are required in `series` to detect outliers.
 
-Default quantile range:
-- `tukey` - 25%/75%
-- `ctukey` - 10%/90%
-
 **Returned value**
 
-- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
-- A non-zero score indicates a possible anomaly.
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
 
 Type: [Array](../../sql-reference/data-types/array.md).
 
@@ -51,23 +45,23 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4,
 Result:
 
 ``` text
-┌───────────print_0───────────────────┐
-│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
-└─────────────────────────────────────┘
+┌───────────print_0─────────────────┐
+│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
+└───────────────────────────────────┘
 ```
 
 Query:
 
 ``` sql
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌─print_0────────────────────────────┐
-│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
-└────────────────────────────────────┘
+┌─print_0──────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
+└──────────────────────────────────────┘
 ```
 
 ## seriesPeriodDetectFFT
diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp
index ce5ed391fa0..66fda8ce976 100644
--- a/src/Functions/seriesOutliersDetectTukey.cpp
+++ b/src/Functions/seriesOutliersDetectTukey.cpp
@@ -14,9 +14,10 @@ namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
 extern const int ILLEGAL_COLUMN;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-///Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
+/// Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
 class FunctionSeriesOutliersDetectTukey : public IFunction
 {
 public:
@@ -36,9 +37,15 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
+        if (arguments.size() != 1 && arguments.size() != 4)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Function {} needs either 1 or 4 arguments; passed {}.",
+                getName(),
+                arguments.size());
+
         FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
         FunctionArgumentDescriptors optional_args{
-            {"kind", &isString<IDataType>, isColumnConst, "const String"},
             {"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
             {"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
             {"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
@@ -48,9 +55,9 @@ public:
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
     }
 
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3, 4}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
         ColumnPtr col = arguments[0].column;
         const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
@@ -58,62 +65,36 @@ public:
         const IColumn & arr_data = col_arr->getData();
         const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
 
-        Float64 min_percentile = 0.10; //default 10th percentile
-        Float64 max_percentile = 0.90; //default 90th percentile
+        ColumnPtr col_res;
+        if (input_rows_count == 0)
+            return ColumnArray::create(ColumnFloat64::create());
+
+
+        Float64 min_percentile = 0.25; /// default 25th percentile
+        Float64 max_percentile = 0.75; /// default 75th percentile
+        Float64 K = 1.50;
 
         if (arguments.size() > 1)
         {
-            //const IColumn * arg_column = arguments[1].column.get();
-            const ColumnConst * arg_string = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
+            Float64 p_min = arguments[1].column->getFloat64(0);
+            if (p_min < 2.0 || p_min > 98.0)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
 
-            if (!arg_string)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName());
+            min_percentile = p_min / 100;
 
-            String kind = arg_string->getValue<String>();
-            if (kind == "ctukey")
-            {
-                if (arguments.size() > 2)
-                {
-                    Float64 p_min = arguments[2].column->getFloat64(0);
-                    if (p_min >= 2.0 && p_min <= 98.0)
-                        min_percentile = p_min / 100;
-                    else
-                        throw Exception(
-                            ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
-                }
+            Float64 p_max = arguments[2].column->getFloat64(0);
+            if (p_max < 2.0 || p_max > 98.0 || p_max < min_percentile * 100)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
 
-                if (arguments.size() > 3)
-                {
-                    Float64 p_max = arguments[3].column->getFloat64(0);
-                    if (p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile * 100)
-                        max_percentile = p_max / 100;
-                    else
-                        throw Exception(
-                            ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be in range [2.0, 98.0]", getName());
-                }
-            }
-            else if (kind == "tukey")
-            {
-                min_percentile = 0.25;
-                max_percentile = 0.75;
-            }
-            else
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
+            max_percentile = p_max / 100;
+
+            auto k_val = arguments[3].column->getFloat64(0);
+            if (k_val < 0.0)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
+
+            K = k_val;
         }
 
-        Float64 K = 1.50;
-        if (arguments.size() == 5)
-        {
-            auto k_val = arguments[4].column->getFloat64(0);
-            if (k_val >= 0.0)
-                K = k_val;
-            else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argument of function {} must be a positive number", getName());
-        }
-
-        ColumnPtr col_res;
-
         if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
             || executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
             || executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
@@ -172,7 +153,7 @@ private:
 
             Float64 q1, q2;
 
-            auto p1 = len * min_percentile;
+            Float64 p1 = len * min_percentile;
             if (p1 == static_cast<Int64>(p1))
             {
                 size_t index = static_cast<size_t>(p1) - 1;
@@ -184,7 +165,7 @@ private:
                 q1 = src_sorted[index];
             }
 
-            auto p2 = len * max_percentile;
+            Float64 p2 = len * max_percentile;
             if (p2 == static_cast<Int64>(p2))
             {
                 size_t index = static_cast<size_t>(p2) - 1;
@@ -219,33 +200,27 @@ REGISTER_FUNCTION(SeriesOutliersDetectTukey)
 {
     factory.registerFunction<FunctionSeriesOutliersDetectTukey>(FunctionDocumentation{
         .description = R"(
-Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
-
-Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
 
 **Syntax**
 
 ``` sql
 seriesOutliersDetectTukey(series);
-seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
+seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
 ```
 
 **Arguments**
 
 - `series` - An array of numeric values.
-- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
-- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
-- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
 - `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
 
-Default quantile range:
-- `tukey` - 25%/75%
-- `ctukey` - 10%/90%
+At least four data points are required in `series` to detect outliers.
 
 **Returned value**
 
-- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
-- A non-zero score indicates a possible anomaly.
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
 
 Type: [Array](../../sql-reference/data-types/array.md).
 
@@ -260,23 +235,23 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4,
 Result:
 
 ``` text
-┌───────────print_0───────────────────┐
-│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
-└─────────────────────────────────────┘
+┌───────────print_0─────────────────┐
+│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
+└───────────────────────────────────┘
 ```
 
 Query:
 
 ``` sql
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
 ```
 
 Result:
 
 ``` text
-┌─print_0────────────────────────────┐
-│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
-└────────────────────────────────────┘
+┌─print_0──────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
+└──────────────────────────────────────┘
 ```)",
         .categories{"Time series analysis"}});
 }
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
index bdcde0419a4..85c65ab10ba 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference
@@ -1,14 +1,12 @@
-[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
 [-4.475000000000001,0,6.925000000000001,0,0,0,0,0,0,0,0,7.925000000000001,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,27.975,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,26.1,0,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
+[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
+[-2.4999999999999996,0,5.1,0,0,0,0,0,2.0999999999999996,50.1,2.0999999999999996,0,0,0,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 [0,0,0,0]
 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
-[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0]
 [0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0]
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
index 7efe4903249..b43fa40e82b 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
+++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS tb1;
 
 CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
-INSERT INTO tb1 VALUES (1, [-3,2.40,15,3.90,5,6,4.50,5.20,3,4,5,16,7,5,5,4]), (2, [-3,2.40,15,3.90,5,6,4.50,5.20,12,45,12,3.40,3,4,5,6]);
+INSERT INTO tb1 VALUES (1, [-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 3, 4, 5, 16, 7, 5, 5, 4]), (2, [-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 45, 12, 3.40, 3, 4, 5, 6]);
 
 -- non-const inputs
 SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n;
-SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n;
+SELECT seriesOutliersDetectTukey(a,10,90,1.5) FROM tb1 ORDER BY n;
 DROP TABLE IF EXISTS tb1;
 
 -- const inputs
@@ -13,18 +13,17 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40,
 SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]);
 
 -- const inputs with optional arguments
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 25, 75);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 10, 90);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'tukey', 10, 90);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 2, 98);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98);
-SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)), 'tukey');
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 1.5);
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 3);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 25, 75, 1.5);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 10, 90, 1.5);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 2, 98, 1.5);
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 2, 98, 1.5);
+SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)));
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, 3);
 
 -- negative tests
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, -1); -- { serverError BAD_ARGUMENTS}
-SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS}
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, -1); -- { serverError BAD_ARGUMENTS}
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33, 53); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
 SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN}
 SELECT seriesOutliersDetectTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS}
\ No newline at end of file

From c93a2cd2dd4c8269007151e5d051687223fbe531 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 17:31:14 +0000
Subject: [PATCH 255/884] Remove unused header

---
 src/Client/ConnectionEstablisher.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index 7ea65708b1d..e80bf09c5b1 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <variant>
-
 #include <Common/AsyncTaskExecutor.h>
 #include <Common/Epoll.h>
 #include <Common/Fiber.h>

From 132c7362cf585f255971b094f62a9120fce1dc31 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 1 Feb 2024 18:51:05 +0100
Subject: [PATCH 256/884] Fix `00191_aggregating_merge_tree_and_final`

It became flaky after we merged #39663. When external aggregation happens it seems like we get more than one part, and because we used `OPTIMIZE` without `FINAL` data wasn't properly merged.
`order by` on the line 12 is not needed by now, but let's add it just in case.
---
 .../0_stateless/00191_aggregating_merge_tree_and_final.sql    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00191_aggregating_merge_tree_and_final.sql b/tests/queries/0_stateless/00191_aggregating_merge_tree_and_final.sql
index 8160d4dee9e..4f73a9e9a54 100644
--- a/tests/queries/0_stateless/00191_aggregating_merge_tree_and_final.sql
+++ b/tests/queries/0_stateless/00191_aggregating_merge_tree_and_final.sql
@@ -7,9 +7,9 @@ INSERT INTO aggregating_00191 (k, u) SELECT intDiv(number, 100) AS k, uniqState(
 
 SELECT k, finalizeAggregation(u) FROM aggregating_00191 FINAL order by k;
 
-OPTIMIZE TABLE aggregating_00191;
+OPTIMIZE TABLE aggregating_00191 FINAL;
 
-SELECT k, finalizeAggregation(u) FROM aggregating_00191;
+SELECT k, finalizeAggregation(u) FROM aggregating_00191 order by k;
 SELECT k, finalizeAggregation(u) FROM aggregating_00191 FINAL order by k;
 
 DROP TABLE aggregating_00191;

From a40be3ea115c79309d29b67e5078cc56eda3da50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 1 Feb 2024 20:04:39 +0100
Subject: [PATCH 257/884] Make MAX use the same rules as permutation for
 complex types

---
 .../AggregateFunctionMax.cpp                  | 14 ++++----
 .../AggregateFunctionMin.cpp                  | 14 ++++----
 src/Core/Field.h                              |  4 ++-
 .../02982_minmax_nan_null_order.reference     | 34 +++++++++++++++++++
 .../02982_minmax_nan_null_order.sql           | 28 +++++++++++++++
 5 files changed, 79 insertions(+), 15 deletions(-)
 create mode 100644 tests/queries/0_stateless/02982_minmax_nan_null_order.reference
 create mode 100644 tests/queries/0_stateless/02982_minmax_nan_null_order.sql

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index e9cd651b8db..c20df0bc81e 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -82,7 +82,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
         return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
     }
 
-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = -1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
     {
@@ -95,7 +95,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                 index = i;
         }
         this->data(place).changeIfGreater(column, index, arena);
@@ -111,7 +111,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
             size_t index = row_begin;
             for (size_t i = index + 1; i < row_end; i++)
             {
-                if (column.compareAt(i, index, column, nan_direction_hint) > 0)
+                if (column.compareAt(i, index, column, nan_null_direction_hint) > 0)
                     index = i;
             }
             this->data(place).changeIfGreater(column, index, arena);
@@ -122,7 +122,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
             constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
             IColumn::Permutation permutation;
             constexpr UInt64 limit = 1;
-            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
             this->data(place).changeIfGreater(column, permutation[0], arena);
         }
     }
@@ -177,7 +177,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
         return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
     }
 
-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = -1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
     {
@@ -190,7 +190,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                 index = i;
         }
         this->data(place).changeIfGreater(column, index, arena);
@@ -205,7 +205,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                 index = i;
         }
         this->data(place).changeIfGreater(column, index, arena);
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index d767bd5c563..7941f3af0de 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -83,7 +83,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
         return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
     }
 
-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
     {
@@ -96,7 +96,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                 index = i;
         }
         this->data(place).changeIfLess(column, index, arena);
@@ -112,7 +112,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
             size_t index = row_begin;
             for (size_t i = index + 1; i < row_end; i++)
             {
-                if (column.compareAt(i, index, column, nan_direction_hint) < 0)
+                if (column.compareAt(i, index, column, nan_null_direction_hint) < 0)
                     index = i;
             }
             this->data(place).changeIfLess(column, index, arena);
@@ -123,7 +123,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
             constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
             IColumn::Permutation permutation;
             constexpr UInt64 limit = 1;
-            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
             this->data(place).changeIfLess(column, permutation[0], arena);
         }
     }
@@ -178,7 +178,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
         return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
     }
 
-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
     {
@@ -191,7 +191,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                 index = i;
         }
         this->data(place).changeIfLess(column, index, arena);
@@ -206,7 +206,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
 
         for (size_t i = index + 1; i < row_end; i++)
         {
-            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                 index = i;
         }
         this->data(place).changeIfLess(column, index, arena);
diff --git a/src/Core/Field.h b/src/Core/Field.h
index 445a5850ca4..f9ce70efbc5 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -306,7 +306,6 @@ static constexpr auto DBMS_MIN_FIELD_SIZE = 32;
   */
 class Field
 {
-    static constexpr int nan_direction_hint = 1; // When comparing Floats NaN are considered to be larger than all numbers
 public:
     struct Types
     {
@@ -511,6 +510,7 @@ public:
             case Types::IPv4:    return get<IPv4>()    < rhs.get<IPv4>();
             case Types::IPv6:    return get<IPv6>()    < rhs.get<IPv6>();
             case Types::Float64:
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                 return FloatCompareHelper<Float64>::less(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
             case Types::String:  return get<String>()  < rhs.get<String>();
             case Types::Array:   return get<Array>()   < rhs.get<Array>();
@@ -555,6 +555,7 @@ public:
             case Types::IPv6:    return get<IPv6>()    <= rhs.get<IPv6>();
             case Types::Float64:
             {
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                 Float64 f1 = get<Float64>();
                 Float64 f2 = get<Float64>();
                 return FloatCompareHelper<Float64>::less(f1, f2, nan_direction_hint)
@@ -595,6 +596,7 @@ public:
             case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
             case Types::Int64:   return get<Int64>() == rhs.get<Int64>();
             case Types::Float64:
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                 return FloatCompareHelper<Float64>::equals(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
             case Types::UUID:    return get<UUID>()    == rhs.get<UUID>();
             case Types::IPv4:    return get<IPv4>()    == rhs.get<IPv4>();
diff --git a/tests/queries/0_stateless/02982_minmax_nan_null_order.reference b/tests/queries/0_stateless/02982_minmax_nan_null_order.reference
new file mode 100644
index 00000000000..2d8b7f8843e
--- /dev/null
+++ b/tests/queries/0_stateless/02982_minmax_nan_null_order.reference
@@ -0,0 +1,34 @@
+-- { echoOn }
+-- Tuples with NaN
+SELECT min((c1, c2)), max((c1, c2)) FROM values((nan, 0.), (0., 0.), (5., 5.));
+(0,0)	(5,5)
+SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+(0,0)
+SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+(5,5)
+SELECT min((c1, c2)), max((c1, c2)) FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.));
+(-5,0)	(5,5)
+SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+(-5,0)
+SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+(5,5)
+-- Tuples with NULL
+SELECT min((c1, c2)), max((c1, c2)) FROM values((NULL, 0.), (0., 0.), (5., 5.));
+(0,0)	(5,5)
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+(0,0)
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+(5,5)
+SELECT min((c1, c2)), max((c1, c2)) FROM values((0., 0.), (5., 5.), (NULL, 0.));
+(0,0)	(5,5)
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t ASC LIMIT 1;
+(0,0)
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t DESC LIMIT 1;
+(5,5)
+-- Map with NULL
+SELECT min(map(0, c1)), max(map(0, c1)) FROM values(NULL, 0, 5., 5.);
+{0:0}	{0:5}
+SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t ASC LIMIT 1;
+{0:0}
+SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t DESC LIMIT 1;
+{0:5}
diff --git a/tests/queries/0_stateless/02982_minmax_nan_null_order.sql b/tests/queries/0_stateless/02982_minmax_nan_null_order.sql
new file mode 100644
index 00000000000..ad9e40874a7
--- /dev/null
+++ b/tests/queries/0_stateless/02982_minmax_nan_null_order.sql
@@ -0,0 +1,28 @@
+-- { echoOn }
+-- Tuples with NaN
+SELECT min((c1, c2)), max((c1, c2)) FROM values((nan, 0.), (0., 0.), (5., 5.));
+SELECT minIf((c1, c2), c2 >= 0.0), maxIf((c1, c2), c2 >= 0.0) FROM values((nan, 0.), (0., 0.), (5., 5.));
+SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+
+SELECT min((c1, c2)), max((c1, c2)) FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.));
+SELECT minIf((c1, c2), c2 >= 0.0), maxIf((c1, c2), c2 >= 0.0) FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.));
+SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+
+-- Tuples with NULL
+SELECT min((c1, c2)), max((c1, c2)) FROM values((NULL, 0.), (0., 0.), (5., 5.));
+SELECT minIf((c1, c2), c2 >= 0), maxIf((c1, c2), c2 >= 0) FROM values((NULL, 0.), (0., 0.), (5., 5.));
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
+
+SELECT min((c1, c2)), max((c1, c2)) FROM values((0., 0.), (5., 5.), (NULL, 0.));
+SELECT minIf((c1, c2), c2 >= 0), maxIf((c1, c2), c2 >= 0) FROM values((0., 0.), (5., 5.), (NULL, 0.));
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t ASC LIMIT 1;
+SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t DESC LIMIT 1;
+
+-- Map with NULL
+SELECT min(map(0, c1)), max(map(0, c1)) FROM values(NULL, 0, 5., 5.);
+SELECT minIf(map(0, c1), assumeNotNull(c1) >= 0), maxIf(map(0, c1), assumeNotNull(c1) >= 0) FROM values(NULL, 0, 5., 5.);
+SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t ASC LIMIT 1;
+SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t DESC LIMIT 1;

From 667256a67d07b117fe78fbd5c399cfd09373af9c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 1 Feb 2024 20:36:38 +0100
Subject: [PATCH 258/884] Fixes for binary.html

---
 programs/server/binary.html | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/programs/server/binary.html b/programs/server/binary.html
index 74095dff537..eec39cd4463 100644
--- a/programs/server/binary.html
+++ b/programs/server/binary.html
@@ -70,6 +70,19 @@
             if (params.has('password')) { password = params.get('password'); }
         }
 
+        let url = `${host}?allow_introspection_functions=1`;
+
+        if (add_http_cors_header) {
+            url += '&add_http_cors_header=1';
+        }
+
+        if (user) {
+            url += `&user=${encodeURIComponent(user)}`;
+        }
+        if (password) {
+            url += `&password=${encodeURIComponent(password)}`;
+        }
+
         let map = L.map('space', {
             crs: L.CRS.Simple,
             center: [-512, 512],
@@ -103,24 +116,11 @@
             const key = `${coords.z}-${coords.x}-${coords.y}`;
             let buf = cached_tiles[key];
             if (!buf) {
-                let url = `${host}?default_format=RowBinary&allow_introspection_functions=1`;
+                let request_url = `${url}&default_format=RowBinary` +
+                    `&param_z=${coords.z}&param_x=${coords.x}&param_y=${coords.y}` +
+                    `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`;
 
-                if (add_http_cors_header) {
-                    // For debug purposes, you may set add_http_cors_header from a browser console
-                    url += '&add_http_cors_header=1';
-                }
-
-                if (user) {
-                    url += `&user=${encodeURIComponent(user)}`;
-                }
-                if (password) {
-                    url += `&password=${encodeURIComponent(password)}`;
-                }
-
-                url += `&param_z=${coords.z}&param_x=${coords.x}&param_y=${coords.y}`;
-                url += `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`;
-
-                const response = await fetch(url, { method: 'POST', body: sql });
+                const response = await fetch(request_url, { method: 'POST', body: sql });
 
                 if (!response.ok) {
                     const text = await response.text();
@@ -238,7 +238,7 @@
 
             const addr_hex = '0x' + addr_int.toString(16);
             const response = fetch(
-                `http://localhost:8123/?default_format=JSON`,
+                `${url}&default_format=JSON`,
                 {
                     method: 'POST',
                     body: `SELECT encodeXMLComponent(demangle(addressToSymbol(${addr_int}::UInt64))) AS name,

From 698d00dbfee5f03badbfa182865ca4f2c9cb627d Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 1 Feb 2024 20:43:43 +0100
Subject: [PATCH 259/884] add to MutateTask

---
 src/Common/ProfileEvents.cpp                        |  1 +
 src/Storages/MergeTree/MutateTask.cpp               | 13 ++++++++++++-
 .../02982_perf_introspection_for_inserts.sh         |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 2e5001ce413..4d1a30d7676 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -204,6 +204,7 @@
     M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     M(MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
     M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging input blocks") \
+    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     \
     M(InsertedWideParts, "Number of parts inserted in Wide format.") \
     M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index e325bb5d720..2affb474104 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -28,6 +28,11 @@
 #include <Common/ProfileEventsScope.h>
 
 
+namespace ProfileEvents
+{
+extern const Event MutateTaskProjectionsCalculationMicroseconds;
+}
+
 namespace CurrentMetrics
 {
     extern const Metric PartMutation;
@@ -1239,7 +1244,13 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
         for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
         {
             const auto & projection = *ctx->projections_to_build[i];
-            auto projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
+
+            Block projection_block;
+            {
+                ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
+                projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
+            }
+
             if (projection_block)
             {
                 auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
diff --git a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
index a45f88fc10f..a249cbef6a2 100755
--- a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
+++ b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
@@ -40,7 +40,7 @@ SELECT
     ProfileEvents['MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds'] > 0,
     ProfileEvents['MergeTreeDataWriterPrimaryKeyCalculationMicroseconds'] > 0,
     ProfileEvents['MergeTreeDataWriterProjectionsCalculationMicroseconds'] > 0,
-    ProfileEvents['MergeTreeDataWriterSecondaryIndicesCalculationMicroseconds'] > 0
+    ProfileEvents['MergeTreeDataWriterSkipIndicesCalculationMicroseconds'] > 0
 FROM system.query_log
 WHERE current_database = currentDatabase() AND query_id='$query_id' AND type = 'QueryFinish';
 """

From ff21aa9a19a9a2ebd9e16aa32ea1a10d4e988abe Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 1 Feb 2024 19:47:05 +0000
Subject: [PATCH 260/884] Don't infer floats in exponential notation by default

---
 src/Core/Settings.h                           |  1 +
 src/Core/SettingsChangesHistory.h             |  1 +
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 src/Formats/SchemaInferenceUtils.cpp          | 16 +++++--
 src/IO/readFloatText.cpp                      |  3 ++
 src/IO/readFloatText.h                        | 48 +++++++++++--------
 ...02982_dont_infer_exponent_floats.reference |  2 +
 .../02982_dont_infer_exponent_floats.sql      |  3 ++
 9 files changed, 51 insertions(+), 25 deletions(-)
 create mode 100644 tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference
 create mode 100644 tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4bb48cb3a29..a892c3bb58e 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1008,6 +1008,7 @@ class IColumn;
     M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
+    M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats", 0) \
     M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \
     M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
     M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index ad04ee79995..a70daf8e1c7 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -84,6 +84,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.2", {{"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 62cbadec4f4..78378168d02 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -226,6 +226,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.try_infer_integers = settings.input_format_try_infer_integers;
     format_settings.try_infer_dates = settings.input_format_try_infer_dates;
     format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes;
+    format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats;
     format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters;
     format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;
     format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 30e4dd04513..ba7cd6055a7 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -46,6 +46,7 @@ struct FormatSettings
     bool try_infer_integers = false;
     bool try_infer_dates = false;
     bool try_infer_datetimes = false;
+    bool try_infer_exponent_floats = false;
 
     enum class DateTimeInputFormat
     {
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 2cfcff75edd..06b52e7a7a2 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -17,6 +17,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/parseDateTimeBestEffort.h>
 #include <IO/PeekableReadBuffer.h>
+#include <IO/readFloatText.h>
 
 #include <Core/Block.h>
 #include <Common/assert_cast.h>
@@ -865,6 +866,13 @@ namespace
         return std::make_shared<DataTypeTuple>(nested_types);
     }
 
+    bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
+    {
+        if (settings.try_infer_exponent_floats)
+            return tryReadFloatText(value, buf);
+        return tryReadFloatTextNoExponent(value, buf);
+    }
+
     DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings)
     {
         if (buf.eof())
@@ -903,7 +911,7 @@ namespace
                     buf.position() = number_start;
                 }
 
-                if (tryReadFloatText(tmp_float, buf))
+                if (tryReadFloat(tmp_float, buf, settings))
                 {
                     if (read_int && buf.position() == int_end)
                         return std::make_shared<DataTypeInt64>();
@@ -937,7 +945,7 @@ namespace
                 peekable_buf.rollbackToCheckpoint(true);
             }
 
-            if (tryReadFloatText(tmp_float, peekable_buf))
+            if (tryReadFloat(tmp_float, peekable_buf, settings))
             {
                 /// Float parsing reads no fewer bytes than integer parsing,
                 /// so position of the buffer is either the same, or further.
@@ -949,7 +957,7 @@ namespace
                 return std::make_shared<DataTypeFloat64>();
             }
         }
-        else if (tryReadFloatText(tmp_float, buf))
+        else if (tryReadFloat(tmp_float, buf, settings))
         {
             return std::make_shared<DataTypeFloat64>();
         }
@@ -1390,7 +1398,7 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
     buf.position() = buf.buffer().begin();
 
     Float64 tmp;
-    if (tryReadFloatText(tmp, buf) && buf.eof())
+    if (tryReadFloat(tmp, buf, settings) && buf.eof())
         return std::make_shared<DataTypeFloat64>();
 
     return nullptr;
diff --git a/src/IO/readFloatText.cpp b/src/IO/readFloatText.cpp
index d1143f7c62c..17ccc1b25b7 100644
--- a/src/IO/readFloatText.cpp
+++ b/src/IO/readFloatText.cpp
@@ -67,4 +67,7 @@ template void readFloatText<Float64>(Float64 &, ReadBuffer &);
 template bool tryReadFloatText<Float32>(Float32 &, ReadBuffer &);
 template bool tryReadFloatText<Float64>(Float64 &, ReadBuffer &);
 
+template bool tryReadFloatTextNoExponent<Float32>(Float32 &, ReadBuffer &);
+template bool tryReadFloatTextNoExponent<Float64>(Float64 &, ReadBuffer &);
+
 }
diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h
index 23e904f305a..51964636389 100644
--- a/src/IO/readFloatText.h
+++ b/src/IO/readFloatText.h
@@ -324,7 +324,7 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
 }
 
 
-template <typename T, typename ReturnType>
+template <typename T, typename ReturnType, bool allow_exponent = true>
 ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
 {
     static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
@@ -395,30 +395,33 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
         after_point_exponent = (read_digits > significant_digits ? -significant_digits : static_cast<int>(-read_digits)) - after_point_num_leading_zeros;
     }
 
-    if (checkChar('e', in) || checkChar('E', in))
+    if constexpr (allow_exponent)
     {
-        if (in.eof())
+        if (checkChar('e', in) || checkChar('E', in))
         {
-            if constexpr (throw_exception)
-                throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent");
-            else
-                return false;
-        }
+            if (in.eof())
+            {
+                if constexpr (throw_exception)
+                    throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent");
+                else
+                    return false;
+            }
 
-        bool exponent_negative = false;
-        if (*in.position() == '-')
-        {
-            exponent_negative = true;
-            ++in.position();
-        }
-        else if (*in.position() == '+')
-        {
-            ++in.position();
-        }
+            bool exponent_negative = false;
+            if (*in.position() == '-')
+            {
+                exponent_negative = true;
+                ++in.position();
+            }
+            else if (*in.position() == '+')
+            {
+                ++in.position();
+            }
 
-        readUIntTextUpToNSignificantDigits<4>(exponent, in);
-        if (exponent_negative)
-            exponent = -exponent;
+            readUIntTextUpToNSignificantDigits<4>(exponent, in);
+            if (exponent_negative)
+                exponent = -exponent;
+        }
     }
 
     if (after_point)
@@ -604,4 +607,7 @@ template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { retu
 template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); }
 template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
 
+/// Don't read exponent part of the number.
+template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool, false>(x, in); }
+
 }
diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference
new file mode 100644
index 00000000000..b6d1ff865e5
--- /dev/null
+++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference
@@ -0,0 +1,2 @@
+c1	Nullable(String)					
+c1	Nullable(Float64)					
diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql
new file mode 100644
index 00000000000..17f62557fc2
--- /dev/null
+++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql
@@ -0,0 +1,3 @@
+DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0;
+DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1;
+

From 6c83508af10973cbb321e96268257154a7157f0a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 19:51:07 +0000
Subject: [PATCH 261/884] Remove unused member

---
 src/QueryPipeline/RemoteQueryExecutor.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index e874b4be726..3fac2065d02 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -50,7 +50,6 @@ public:
         std::shared_ptr<TaskIterator> task_iterator = nullptr;
         std::shared_ptr<ParallelReplicasReadingCoordinator> parallel_reading_coordinator = nullptr;
         std::optional<IConnections::ReplicaInfo> replica_info = {};
-        GetPriorityForLoadBalancing::Func priority_func;
     };
 
     /// Takes already set connection.

From 2b786f7c6de22ca5ee01613a529a35306b391ab0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 1 Feb 2024 20:56:31 +0100
Subject: [PATCH 262/884] Wrap disconnect into try/catch in RemoteQueryExecutor
 dtor as well

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/QueryPipeline/RemoteQueryExecutor.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index e053bd2703a..ab52dee2cf9 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -202,7 +202,17 @@ RemoteQueryExecutor::~RemoteQueryExecutor()
       * these connections did not remain hanging in the out-of-sync state.
       */
     if (established || (isQueryPending() && connections))
-        connections->disconnect();
+    {
+        /// May also throw (so as cancel() above)
+        try
+        {
+            connections->disconnect();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log ? log : getLogger("RemoteQueryExecutor"));
+        }
+    }
 }
 
 /** If we receive a block with slightly different column types, or with excessive columns,

From d73abc17f53282a39ee085a0ae3dfcc51f92a86e Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 1 Feb 2024 21:06:44 +0100
Subject: [PATCH 263/884] better

---
 src/Common/ProfileEvents.cpp                              | 6 +++---
 src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp  | 4 ++--
 src/Storages/MergeTree/MergeTreeDataWriter.cpp            | 8 ++++----
 .../0_stateless/02982_perf_introspection_for_inserts.sh   | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 4d1a30d7676..d831402fd37 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -199,11 +199,11 @@
     \
     M(MergeTreeDataWriterSkipIndicesCalculationMicroseconds, "Time spent calculating skip indices") \
     M(MergeTreeDataWriterStatisticsCalculationMicroseconds, "Time spent calculating statistics") \
-    M(MergeTreeDataWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
+    M(MergeTreeDataWriterSortingBlocksMicroseconds, "Time spent sorting blocks") \
     M(MergeTreeDataWriterMergingBlocksMicroseconds, "Time spent merging input blocks (for special MergeTree engines)") \
     M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
-    M(MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds, "Time spent calculating primary key") \
-    M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging input blocks") \
+    M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \
+    M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \
     M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     \
     M(InsertedWideParts, "Number of parts inserted in Wide format.") \
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index b8cfdf19f54..fd83d2ebfe9 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -500,7 +500,7 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
     }
 
     for (size_t i = 0; i < stats.size(); ++i)
-        LOG_DEBUG(log, "Spent {} ms calculating statistics {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName());
+        LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name);
 }
 
 void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
@@ -526,7 +526,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
         store.second->finalize();
 
     for (size_t i = 0; i < skip_indices.size(); ++i)
-        LOG_DEBUG(log, "Spent {} ms calculating index {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name);
+        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name);
 
     gin_index_stores.clear();
     skip_indices_streams.clear();
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 8f522832bce..0418bf6fc7d 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -36,7 +36,7 @@ namespace ProfileEvents
     extern const Event MergeTreeDataWriterRows;
     extern const Event MergeTreeDataWriterUncompressedBytes;
     extern const Event MergeTreeDataWriterCompressedBytes;
-    extern const Event MergeTreeDataWriterPrimaryKeyCalculationMicroseconds;
+    extern const Event MergeTreeDataWriterSortingBlocksMicroseconds;
     extern const Event MergeTreeDataWriterMergingBlocksMicroseconds;
     extern const Event MergeTreeDataWriterProjectionsCalculationMicroseconds;
     extern const Event MergeTreeDataProjectionWriterBlocks;
@@ -44,7 +44,7 @@ namespace ProfileEvents
     extern const Event MergeTreeDataProjectionWriterRows;
     extern const Event MergeTreeDataProjectionWriterUncompressedBytes;
     extern const Event MergeTreeDataProjectionWriterCompressedBytes;
-    extern const Event MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds;
+    extern const Event MergeTreeDataProjectionWriterSortingBlocksMicroseconds;
     extern const Event MergeTreeDataProjectionWriterMergingBlocksMicroseconds;
     extern const Event RejectedInserts;
 }
@@ -478,7 +478,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
     IColumn::Permutation perm;
     if (!sort_description.empty())
     {
-        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterPrimaryKeyCalculationMicroseconds);
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSortingBlocksMicroseconds);
 
         if (!isAlreadySorted(block, sort_description))
         {
@@ -702,7 +702,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
     IColumn::Permutation perm;
     if (!sort_description.empty())
     {
-        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds);
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterSortingBlocksMicroseconds);
 
         if (!isAlreadySorted(block, sort_description))
         {
diff --git a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
index a249cbef6a2..f5fb54b54d3 100755
--- a/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
+++ b/tests/queries/0_stateless/02982_perf_introspection_for_inserts.sh
@@ -37,8 +37,8 @@ $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q """
 SELECT
     ProfileEvents['MergeTreeDataProjectionWriterMergingBlocksMicroseconds'] > 0,
-    ProfileEvents['MergeTreeDataProjectionWriterPrimaryKeyCalculationMicroseconds'] > 0,
-    ProfileEvents['MergeTreeDataWriterPrimaryKeyCalculationMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataProjectionWriterSortingBlocksMicroseconds'] > 0,
+    ProfileEvents['MergeTreeDataWriterSortingBlocksMicroseconds'] > 0,
     ProfileEvents['MergeTreeDataWriterProjectionsCalculationMicroseconds'] > 0,
     ProfileEvents['MergeTreeDataWriterSkipIndicesCalculationMicroseconds'] > 0
 FROM system.query_log

From 82d7b2214407c68b96334d002f01ed68937e07f4 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 1 Feb 2024 21:06:29 +0000
Subject: [PATCH 264/884] Parallel replicas: better replicas failover

---
 .../ClusterProxy/executeQuery.cpp             | 15 +++---
 src/Processors/QueryPlan/ReadFromRemote.cpp   | 52 ++++++-------------
 src/Processors/QueryPlan/ReadFromRemote.h     |  5 +-
 src/QueryPipeline/RemoteQueryExecutor.cpp     | 44 ++++++++++++++++
 src/QueryPipeline/RemoteQueryExecutor.h       | 13 ++++-
 5 files changed, 82 insertions(+), 47 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 35451e1d774..023ed6c7b61 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -374,12 +374,12 @@ void executeQueryWithParallelReplicas(
         shard_num = column->getUInt(0);
     }
 
-    ClusterPtr new_cluster;
+    const auto shard_count = not_optimized_cluster->getShardCount();
+    ClusterPtr new_cluster = not_optimized_cluster;
     /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard
     /// shards are numbered in order of appearance in the cluster config
     if (shard_num > 0)
     {
-        const auto shard_count = not_optimized_cluster->getShardCount();
         if (shard_num > shard_count)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
@@ -395,17 +395,16 @@ void executeQueryWithParallelReplicas(
 
         // get cluster for shard specified by shard_num
         // shard_num is 1-based, but getClusterWithSingleShard expects 0-based index
-        auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
-        // convert cluster to representation expected by parallel replicas
-        new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
+        new_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
     }
     else
     {
-        new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
+        // todo: add error and exception for this case
+        chassert(not_optimized_cluster->getShardCount() == 1);
     }
 
-    auto coordinator
-        = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount(), settings.parallel_replicas_mark_segment_size);
+    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(
+        new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size);
     auto external_tables = new_context->getExternalTables();
     auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
         query_ast,
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 4dd79903965..fcdb7cd4a70 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -399,51 +399,33 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
     const Settings & current_settings = context->getSettingsRef();
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
 
+    const auto & shard = cluster->getShardsInfo().at(0);
     size_t all_replicas_count = current_settings.max_parallel_replicas;
-    if (all_replicas_count > cluster->getShardsInfo().size())
+    if (all_replicas_count > shard.getAllNodeCount())
     {
-        LOG_INFO(getLogger("ReadFromParallelRemoteReplicasStep"),
-            "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "\
-            "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, cluster->getShardsInfo().size());
-        all_replicas_count = cluster->getShardsInfo().size();
+        LOG_INFO(
+            getLogger("ReadFromParallelRemoteReplicasStep"),
+            "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "
+            "Will use the latter number to execute the query.",
+            current_settings.max_parallel_replicas,
+            shard.getAllNodeCount());
+        all_replicas_count = shard.getAllNodeCount();
     }
 
-    /// Find local shard. It might happen that there is no local shard, but that's fine
-    for (const auto & shard: cluster->getShardsInfo())
+    chassert(cluster->getShardCount() == 1);
+    auto shuffled_pool = shard.pool->getShuffledPools(current_settings);
+    shuffled_pool.resize(all_replicas_count);
+
+    for (size_t i=0; i < all_replicas_count; ++i)
     {
-        if (shard.isLocal())
-        {
-            IConnections::ReplicaInfo replica_info
-            {
-                .all_replicas_count = all_replicas_count,
-                /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
-                /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
-                .number_of_current_replica = shard.shard_num - 1,
-            };
-
-            addPipeForSingeReplica(pipes, shard.pool, replica_info);
-        }
-    }
-
-    auto current_shard = cluster->getShardsInfo().begin();
-    while (pipes.size() != all_replicas_count)
-    {
-        if (current_shard->isLocal())
-        {
-            ++current_shard;
-            continue;
-        }
-
         IConnections::ReplicaInfo replica_info
         {
             .all_replicas_count = all_replicas_count,
-            /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
             /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
-            .number_of_current_replica = current_shard->shard_num - 1,
+            .number_of_current_replica = i,
         };
 
-        addPipeForSingeReplica(pipes, current_shard->pool, replica_info);
-        ++current_shard;
+        addPipeForSingeReplica(pipes, shuffled_pool[i].pool, replica_info);
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
@@ -456,7 +438,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
 }
 
 
-void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info)
+void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, IConnectionPool* pool, IConnections::ReplicaInfo replica_info)
 {
     bool add_agg_info = stage == QueryProcessingStage::WithMergeableState;
     bool add_totals = false;
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index f853a12910b..07443220c8d 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -10,8 +10,7 @@
 namespace DB
 {
 
-class ConnectionPoolWithFailover;
-using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
+class IConnectionPool;
 
 class Throttler;
 using ThrottlerPtr = std::shared_ptr<Throttler>;
@@ -92,7 +91,7 @@ public:
 
 private:
 
-    void addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info);
+    void addPipeForSingeReplica(Pipes & pipes, IConnectionPool* pool, IConnections::ReplicaInfo replica_info);
 
     ClusterPtr cluster;
     ASTPtr query_ast;
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 136a3bb09c6..7f25c2331c3 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -62,6 +62,50 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 {
 }
 
+RemoteQueryExecutor::RemoteQueryExecutor(
+    IConnectionPool * pool,
+    const String & query_,
+    const Block & header_,
+    ContextPtr context_,
+    ThrottlerPtr throttler,
+    const Scalars & scalars_,
+    const Tables & external_tables_,
+    QueryProcessingStage::Enum stage_,
+    std::optional<Extension> extension_)
+    : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
+{
+    create_connections = [this, pool, throttler, extension_](AsyncCallback)
+    {
+        const Settings & current_settings = context->getSettingsRef();
+        auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
+
+        ConnectionPoolWithFailover::TryResult result;
+        std::string fail_message;
+        if (main_table)
+        {
+            auto table_name = main_table.getQualifiedName();
+
+            ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, &table_name);
+            connection_establisher.run(result, fail_message);
+        }
+        else
+        {
+            ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, nullptr);
+            connection_establisher.run(result, fail_message);
+        }
+
+        std::vector<IConnectionPool::Entry> connection_entries;
+        if (!result.entry.isNull() && result.is_usable)
+            connection_entries.emplace_back(std::move(result.entry));
+
+        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), context->getSettingsRef(), throttler);
+        if (extension_ && extension_->replica_info)
+            res->setReplicaInfo(*extension_->replica_info);
+
+        return res;
+    };
+}
+
 RemoteQueryExecutor::RemoteQueryExecutor(
     Connection & connection,
     const String & query_, const Block & header_, ContextPtr context_,
diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index 444f1258f3e..cc3291313a8 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -50,9 +50,20 @@ public:
         std::shared_ptr<TaskIterator> task_iterator = nullptr;
         std::shared_ptr<ParallelReplicasReadingCoordinator> parallel_reading_coordinator = nullptr;
         std::optional<IConnections::ReplicaInfo> replica_info = {};
-        GetPriorityForLoadBalancing::Func priority_func;
     };
 
+    /// Takes a connection pool to a node (not cluster)
+    RemoteQueryExecutor(
+        IConnectionPool * pool,
+        const String & query_,
+        const Block & header_,
+        ContextPtr context_,
+        ThrottlerPtr throttler = nullptr,
+        const Scalars & scalars_ = Scalars(),
+        const Tables & external_tables_ = Tables(),
+        QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
+        std::optional<Extension> extension_ = std::nullopt);
+
     /// Takes already set connection.
     RemoteQueryExecutor(
         Connection & connection,

From 04c8bd1be54e4b35ab2dd331fca651b9ec2d00ec Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Fri, 29 Dec 2023 06:57:55 +0000
Subject: [PATCH 265/884] Adaptive asynchronous insert timeouts

Implement the algorithm described in #56783 for adaptive asynchronous
insert timeouts.
- The adaptive async insert timeout can take values within
[async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms].
- The initial value is set to async_insert_busy_timeout_min_ms.
- If the elapsed time since the most recent queue insert was
  greater than the maximum timeout, process the queue content immediately,
  and reduce the timeout.
- If the elapsed time was long enough (longer than a would-be decreased
  timeout), decrease the timeout.
- The adaptive timeout is changes exponentially based on the
  async_insert_busy_timeout_{increase|decrease}_rate.

Fixes: https://github.com/ClickHouse/ClickHouse/issues/56783
---
 src/Core/Settings.h                          |   6 +-
 src/Interpreters/AsynchronousInsertQueue.cpp | 156 ++++++++++++++++---
 src/Interpreters/AsynchronousInsertQueue.h   |  14 ++
 src/Interpreters/Context.cpp                 |   7 +-
 4 files changed, 154 insertions(+), 29 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bc518ca856b..2b36b83edd2 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -750,8 +750,12 @@ class IColumn;
     M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
     M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
     M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \
-    M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
     M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \
+    M(Bool, async_insert_use_adaptive_busy_timeout, true, "If it is set to true, use adaptive busy timeout for asynchronous inserts", 0) \
+    M(Milliseconds, async_insert_busy_timeout_min_ms, 50, "If auto-adjusting is enabled through async_insert_use_adaptive_busy_timeout, minimum time to wait before dumping collected data per query since the first data appeared. It also serves as the initial value for the adaptive algorithm", 0) \
+    M(Milliseconds, async_insert_busy_timeout_max_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared.", 0) ALIAS(async_insert_busy_timeout_ms) \
+    M(Double, async_insert_busy_timeout_increase_rate, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases", 0) \
+    M(Double, async_insert_busy_timeout_decrease_rate, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases", 0) \
     \
     M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
     M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 3aa61f93924..5aedf581775 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -60,6 +60,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_FORMAT;
     extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
+    extern const int INVALID_SETTING_VALUE;
 }
 
 static const NameSet settings_to_skip
@@ -181,6 +182,12 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
     if (!pool_size)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero");
 
+    const auto & settings = getContext()->getSettingsRef();
+
+    for (size_t i = 0; i < pool_size; ++i)
+        queue_shards[i].busy_timeout_ms
+            = std::min(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(settings.async_insert_busy_timeout_max_ms));
+
     for (size_t i = 0; i < pool_size; ++i)
         dump_by_first_update_threads.emplace_back([this, i] { processBatchDeadlines(i); });
 }
@@ -222,9 +229,7 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key,
     /// Wrap 'unique_ptr' with 'shared_ptr' to make this
     /// lambda copyable and allow to save it to the thread pool.
     pool.scheduleOrThrowOnError([key, global_context, my_data = std::make_shared<InsertDataPtr>(std::move(data))]() mutable
-    {
-        processData(key, std::move(*my_data), std::move(global_context));
-    });
+                                { processData(key, std::move(*my_data), std::move(global_context)); });
 }
 
 void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context)
@@ -300,6 +305,7 @@ AsynchronousInsertQueue::PushResult
 AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context)
 {
     const auto & settings = query_context->getSettingsRef();
+    validateSettings(settings, log);
     auto & insert_query = query->as<ASTInsertQuery &>();
 
     auto data_kind = chunk.getDataKind();
@@ -319,17 +325,14 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
 
     auto shard_num = key.hash % pool_size;
     auto & shard = queue_shards[shard_num];
-
     {
         std::lock_guard lock(shard.mutex);
 
         auto [it, inserted] = shard.iterators.try_emplace(key.hash);
+        auto now = std::chrono::steady_clock::now();
+        auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, now);
         if (inserted)
-        {
-            auto now = std::chrono::steady_clock::now();
-            auto timeout = now + Milliseconds{key.settings.async_insert_busy_timeout_ms};
-            it->second = shard.queue.emplace(timeout, Container{key, std::make_unique<InsertData>()}).first;
-        }
+            it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>()}).first;
 
         auto queue_it = it->second;
         auto & data = queue_it->second.data;
@@ -346,16 +349,29 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size;
         bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate;
 
-        /// Here we check whether we hit the limit on maximum data size in the buffer.
-        /// And use setting from query context.
-        /// It works, because queries with the same set of settings are already grouped together.
-        if (!flush_stopped && (has_enough_bytes || has_enough_queries))
+        auto max_busy_timeout_exceeded = [&shard, &settings, &now]() -> bool
+        {
+            if (!settings.async_insert_use_adaptive_busy_timeout || !shard.last_insert_time)
+                return false;
+
+            auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms);
+            return *shard.last_insert_time + max_ms < now;
+        };
+
+        /// Here we check whether we have hit the limit on the maximum data size in the buffer or
+        /// if the elapsed time since the last insert exceeds the maximum busy wait timeout.
+        /// We also use the limit settings from the query context.
+        /// This works because queries with the same set of settings are already grouped together.
+        if (!flush_stopped && (has_enough_bytes || has_enough_queries || max_busy_timeout_exceeded()))
         {
             data_to_process = std::move(data);
             shard.iterators.erase(it);
             shard.queue.erase(queue_it);
         }
 
+        shard.last_insert_time = now;
+        shard.busy_timeout_ms = timeout_ms;
+
         CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
         ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
         ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
@@ -374,6 +390,95 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
     };
 }
 
+AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
+    const Settings & settings,
+    const AsynchronousInsertQueue::QueueShard & shard,
+    size_t shard_num,
+    std::chrono::steady_clock::time_point now) const
+{
+    if (!settings.async_insert_use_adaptive_busy_timeout)
+        return settings.async_insert_busy_timeout_max_ms;
+
+    const auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms);
+    const auto min_ms = std::min(std::max(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(1)), max_ms);
+
+    auto normalize = [&min_ms, &max_ms](const auto & t_ms) { return std::min(std::max(t_ms, min_ms), max_ms); };
+
+    if (!shard.last_insert_time)
+        return normalize(shard.busy_timeout_ms);
+
+    const auto & last_insert_time = *shard.last_insert_time;
+    const double increase_rate = settings.async_insert_busy_timeout_increase_rate;
+    const double decrease_rate = settings.async_insert_busy_timeout_decrease_rate;
+
+    const auto decreased_timeout_ms = std::min(
+        std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms / (1.0 + decrease_rate)), shard.busy_timeout_ms - Milliseconds(1));
+
+    /// Increase the timeout for frequent inserts.
+    if (last_insert_time + min_ms > now)
+    {
+        auto timeout_ms = std::max(
+            std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms * (1.0 + increase_rate)),
+            shard.busy_timeout_ms + Milliseconds(1));
+        if (timeout_ms != shard.busy_timeout_ms)
+            LOG_TRACE(
+                log,
+                "Async timeout increased from {} to {} for queue shard {}.",
+                shard.busy_timeout_ms.count(),
+                timeout_ms.count(),
+                shard_num);
+
+        return normalize(timeout_ms);
+    }
+    /// Decrease the timeout if inserts are not frequent,
+    /// that is, if the time since the last insert long enough (exceeding the adjusted timeout).
+    /// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
+    else if (last_insert_time + decreased_timeout_ms < now)
+    {
+        auto timeout_ms = decreased_timeout_ms;
+        if (timeout_ms != shard.busy_timeout_ms)
+            LOG_TRACE(
+                log,
+                "Async timeout decreased from {} to {} for queue shard {}.",
+                shard.busy_timeout_ms.count(),
+                timeout_ms.count(),
+                shard_num);
+
+        return normalize(timeout_ms);
+    }
+
+    return normalize(shard.busy_timeout_ms);
+}
+
+void AsynchronousInsertQueue::validateSettings(const Settings & settings, LoggerPtr log)
+{
+    const auto max_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_max_ms);
+
+    if (max_ms == std::chrono::milliseconds::zero())
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_max_ms' can't be zero");
+
+    if (!settings.async_insert_use_adaptive_busy_timeout)
+        return;
+
+    /// Adaptive timeout settings.
+    const auto min_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_min_ms);
+
+    if (min_ms > max_ms)
+        if (log)
+            LOG_WARNING(
+                log,
+                "Setting 'async_insert_busy_timeout_min_ms'={} is greater than 'async_insert_busy_timeout_max_ms'={}. Ignoring "
+                "'async_insert_busy_timeout_min_ms'",
+                min_ms.count(),
+                max_ms.count());
+
+    if (settings.async_insert_busy_timeout_increase_rate <= 0)
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_increase_rate' must be greater than zero");
+
+    if (settings.async_insert_busy_timeout_decrease_rate <= 0)
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_decrease_rate' must be greater than zero");
+}
+
 void AsynchronousInsertQueue::flushAll()
 {
     std::lock_guard flush_lock(flush_mutex);
@@ -395,8 +500,9 @@ void AsynchronousInsertQueue::flushAll()
     size_t total_bytes = 0;
     size_t total_entries = 0;
 
-    for (auto & queue : queues_to_flush)
+    for (size_t i = 0; i < pool_size; ++i)
     {
+        auto & queue = queues_to_flush[i];
         total_queries += queue.size();
         for (auto & [_, entry] : queue)
         {
@@ -429,17 +535,21 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
         {
             std::unique_lock lock(shard.mutex);
 
-            shard.are_tasks_available.wait_for(lock,
-                Milliseconds(getContext()->getSettingsRef().async_insert_poll_timeout_ms), [&shard, this]
-            {
-                if (shutdown)
-                    return true;
+            const auto rel_time
+                = std::min(shard.busy_timeout_ms, Milliseconds(getContext()->getSettingsRef().async_insert_poll_timeout_ms));
+            shard.are_tasks_available.wait_for(
+                lock,
+                rel_time,
+                [&shard, this]
+                {
+                    if (shutdown)
+                        return true;
 
-                if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now())
-                    return true;
+                    if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now())
+                        return true;
 
-                return false;
-            });
+                    return false;
+                });
 
             if (shutdown)
                 return;
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index f4bfdbd38a5..9d244034b1c 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -53,6 +53,8 @@ public:
         Preprocessed = 1,
     };
 
+    static void validateSettings(const Settings & settings, LoggerPtr log);
+
     /// Force flush the whole queue.
     void flushAll();
 
@@ -187,6 +189,11 @@ private:
 
         Queue queue;
         QueueIteratorByKey iterators;
+
+        using OptionalTimePoint = std::optional<std::chrono::steady_clock::time_point>;
+        OptionalTimePoint last_insert_time;
+
+        std::chrono::milliseconds busy_timeout_ms;
     };
 
     const size_t pool_size;
@@ -217,6 +224,13 @@ private:
     LoggerPtr log = getLogger("AsynchronousInsertQueue");
 
     PushResult pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context);
+
+    Milliseconds getBusyWaitTimeoutMs(
+        const Settings & settings,
+        const AsynchronousInsertQueue::QueueShard & shard,
+        size_t shard_num,
+        std::chrono::steady_clock::time_point now) const;
+
     void preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context);
 
     void processBatchDeadlines(size_t shard_num);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0a4d01de86d..0e5897e7306 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4862,12 +4862,9 @@ AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const
 
 void Context::setAsynchronousInsertQueue(const std::shared_ptr<AsynchronousInsertQueue> & ptr)
 {
-    using namespace std::chrono;
+    AsynchronousInsertQueue::validateSettings(settings, getLogger("Context"));
 
-    if (std::chrono::milliseconds(settings.async_insert_busy_timeout_ms) == 0ms)
-        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_busy_timeout_ms can't be zero");
-
-    if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == 0ms)
+    if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero())
         throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero");
 
     shared->async_insert_queue = ptr;

From c04e5a4f35783ae4bacd8bbb696871dedc95ca0f Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Thu, 4 Jan 2024 01:09:15 +0000
Subject: [PATCH 266/884] Asynchronous insert queue size and bytes metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add AsynchronousInsertQueueSize and AsynchronousInsertQueueBytes
metrics to improve observability of asynchronous inserts.
The metrics do not account for tasks dispatched for immediate processing
(as opposed to, e.g., PendingAsyncInsert).

```
SELECT value
FROM system.metrics
WHERE metric IN ('AsynchronousInsertQueueSize', 'PendingAsyncInsert')

Query id: a711dd83-b48d-4ad5-8031-fa59b21a7c38

┌─value─┐
│    18 │
│    23 │
└───────┘
```

```
SELECT value
FROM system.metrics
WHERE metric IN ('AsynchronousInsertQueueSize', 'AsynchronousInsertQueueBytes')

Query id: b35a7ceb-2bb5-46ad-b301-e6cf03508699

┌─value─┐
│    28 │
│  1372 │
└───────┘
```
---
 src/Common/CurrentMetrics.cpp                |  2 ++
 src/Interpreters/AsynchronousInsertQueue.cpp | 25 ++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index e7534f71dbb..835ba606cdf 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -143,6 +143,8 @@
     M(AsynchronousInsertThreads, "Number of threads in the AsynchronousInsert thread pool.") \
     M(AsynchronousInsertThreadsActive, "Number of threads in the AsynchronousInsert thread pool running a task.") \
     M(AsynchronousInsertThreadsScheduled, "Number of queued or active jobs in the AsynchronousInsert thread pool.") \
+    M(AsynchronousInsertQueueSize, "Number of pending tasks in the AsynchronousInsert queue.") \
+    M(AsynchronousInsertQueueBytes, "Number of pending bytes in the AsynchronousInsert queue.") \
     M(StartupSystemTablesThreads, "Number of threads in the StartupSystemTables thread pool.") \
     M(StartupSystemTablesThreadsActive, "Number of threads in the StartupSystemTables thread pool running a task.") \
     M(StartupSystemTablesThreadsScheduled, "Number of queued or active jobs in the StartupSystemTables thread pool.") \
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 5aedf581775..b665cd12f93 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -40,6 +40,8 @@ namespace CurrentMetrics
     extern const Metric AsynchronousInsertThreads;
     extern const Metric AsynchronousInsertThreadsActive;
     extern const Metric AsynchronousInsertThreadsScheduled;
+    extern const Metric AsynchronousInsertQueueSize;
+    extern const Metric AsynchronousInsertQueueBytes;
 }
 
 namespace ProfileEvents
@@ -339,6 +341,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         size_t entry_data_size = entry->chunk.byteSize();
 
         assert(data);
+        auto size_in_bytes = data->size_in_bytes;
         data->size_in_bytes += entry_data_size;
         data->entries.emplace_back(entry);
         insert_future = entry->getFuture();
@@ -375,6 +378,19 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
         ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
         ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
+
+        if (data_to_process)
+        {
+            if (!inserted)
+                CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize);
+            CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes);
+        }
+        else
+        {
+            if (inserted)
+                CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueSize);
+            CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueBytes, entry_data_size);
+        }
     }
 
     if (data_to_process)
@@ -559,17 +575,26 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
 
             const auto now = std::chrono::steady_clock::now();
 
+            size_t size_in_bytes = 0;
             while (true)
             {
                 if (shard.queue.empty() || shard.queue.begin()->first > now)
                     break;
 
                 auto it = shard.queue.begin();
+                size_in_bytes += it->second.data->size_in_bytes;
+
                 shard.iterators.erase(it->second.key.hash);
 
                 entries_to_flush.emplace_back(std::move(it->second));
                 shard.queue.erase(it);
             }
+
+            if (!entries_to_flush.empty())
+            {
+                CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize, entries_to_flush.size());
+                CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes);
+            }
         }
 
         for (auto & entry : entries_to_flush)

From 4ef02a189556499f1ba3016293bb7e34ad7f4143 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Wed, 17 Jan 2024 23:35:44 +0000
Subject: [PATCH 267/884] Log timeout for entries processed asynchronously

---
 src/Interpreters/AsynchronousInsertLog.cpp   |  5 ++--
 src/Interpreters/AsynchronousInsertLog.h     |  1 +
 src/Interpreters/AsynchronousInsertQueue.cpp | 29 ++++++++++++--------
 src/Interpreters/AsynchronousInsertQueue.h   |  8 ++++--
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp
index 9034f582869..5d851f6b47d 100644
--- a/src/Interpreters/AsynchronousInsertLog.cpp
+++ b/src/Interpreters/AsynchronousInsertLog.cpp
@@ -32,8 +32,7 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription()
             {"Preprocessed", static_cast<Int8>(DataKind::Preprocessed)},
         });
 
-    return ColumnsDescription
-    {
+    return ColumnsDescription{
         {"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
         {"event_date", std::make_shared<DataTypeDate>()},
         {"event_time", std::make_shared<DataTypeDateTime>()},
@@ -53,6 +52,7 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription()
         {"flush_time", std::make_shared<DataTypeDateTime>()},
         {"flush_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
         {"flush_query_id", std::make_shared<DataTypeString>()},
+        {"timeout_milliseconds", std::make_shared<DataTypeUInt64>()},
     };
 }
 
@@ -80,6 +80,7 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(flush_time);
     columns[i++]->insert(flush_time_microseconds);
     columns[i++]->insert(flush_query_id);
+    columns[i++]->insert(timeout_milliseconds);
 }
 
 }
diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h
index d05375002ad..70b56a273ad 100644
--- a/src/Interpreters/AsynchronousInsertLog.h
+++ b/src/Interpreters/AsynchronousInsertLog.h
@@ -38,6 +38,7 @@ struct AsynchronousInsertLogElement
     time_t flush_time{};
     Decimal64 flush_time_microseconds{};
     String flush_query_id;
+    UInt64 timeout_milliseconds = 0;
 
     static std::string name() { return "AsynchronousInsertLog"; }
     static ColumnsDescription getColumnsDescription();
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index b665cd12f93..6844ae5e551 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -334,7 +334,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         auto now = std::chrono::steady_clock::now();
         auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, now);
         if (inserted)
-            it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>()}).first;
+            it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
 
         auto queue_it = it->second;
         auto & data = queue_it->second.data;
@@ -367,6 +367,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         /// This works because queries with the same set of settings are already grouped together.
         if (!flush_stopped && (has_enough_bytes || has_enough_queries || max_busy_timeout_exceeded()))
         {
+            data->timeout_ms = Milliseconds::zero();
             data_to_process = std::move(data);
             shard.iterators.erase(it);
             shard.queue.erase(queue_it);
@@ -748,9 +749,12 @@ try
         throw;
     }
 
-    auto add_entry_to_log = [&](
-        const auto & entry, const auto & entry_query_for_logging,
-        const auto & exception, size_t num_rows, size_t num_bytes)
+    auto add_entry_to_log = [&](const auto & entry,
+                                const auto & entry_query_for_logging,
+                                const auto & exception,
+                                size_t num_rows,
+                                size_t num_bytes,
+                                Milliseconds timeout_ms)
     {
         if (!async_insert_log)
             return;
@@ -767,6 +771,7 @@ try
         elem.rows = num_rows;
         elem.exception = exception;
         elem.data_kind = entry->chunk.getDataKind();
+        elem.timeout_milliseconds = timeout_ms.count();
 
         /// If there was a parsing error,
         /// the entry won't be flushed anyway,
@@ -801,9 +806,9 @@ try
     auto header = pipeline.getHeader();
 
     if (key.data_kind == DataKind::Parsed)
-        chunk = processEntriesWithParsing(key, data->entries, header, insert_context, log, add_entry_to_log);
+        chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log);
     else
-        chunk = processPreprocessedEntries(key, data->entries, header, insert_context, add_entry_to_log);
+        chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log);
 
     ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
 
@@ -864,7 +869,7 @@ catch (...)
 template <typename LogFunc>
 Chunk AsynchronousInsertQueue::processEntriesWithParsing(
     const InsertQuery & key,
-    const std::list<InsertData::EntryPtr> & entries,
+    const InsertDataPtr & data,
     const Block & header,
     const ContextPtr & insert_context,
     const LoggerPtr logger,
@@ -905,7 +910,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
     auto chunk_info = std::make_shared<AsyncInsertInfo>();
     auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
 
-    for (const auto & entry : entries)
+    for (const auto & entry : data->entries)
     {
         current_entry = entry;
 
@@ -921,7 +926,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
         chunk_info->offsets.push_back(total_rows);
         chunk_info->tokens.push_back(entry->async_dedup_token);
 
-        add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes);
+        add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
         current_exception.clear();
     }
 
@@ -933,7 +938,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
 template <typename LogFunc>
 Chunk AsynchronousInsertQueue::processPreprocessedEntries(
     const InsertQuery & key,
-    const std::list<InsertData::EntryPtr> & entries,
+    const InsertDataPtr & data,
     const Block & header,
     const ContextPtr & insert_context,
     LogFunc && add_to_async_insert_log)
@@ -956,7 +961,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
         return it->second;
     };
 
-    for (const auto & entry : entries)
+    for (const auto & entry : data->entries)
     {
         const auto * block = entry->chunk.asBlock();
         if (!block)
@@ -972,7 +977,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
         chunk_info->tokens.push_back(entry->async_dedup_token);
 
         const auto & query_for_logging = get_query_by_format(entry->format);
-        add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes());
+        add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
     }
 
     Chunk chunk(std::move(result_columns), total_rows);
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index 9d244034b1c..1f18cf56032 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -148,6 +148,9 @@ private:
             std::atomic_bool finished = false;
         };
 
+        InsertData() = default;
+        explicit InsertData(Milliseconds timeout_ms_) : timeout_ms(timeout_ms_) { }
+
         ~InsertData()
         {
             auto it = entries.begin();
@@ -165,6 +168,7 @@ private:
 
         std::list<EntryPtr> entries;
         size_t size_in_bytes = 0;
+        Milliseconds timeout_ms = Milliseconds::zero();
     };
 
     using InsertDataPtr = std::unique_ptr<InsertData>;
@@ -241,7 +245,7 @@ private:
     template <typename LogFunc>
     static Chunk processEntriesWithParsing(
         const InsertQuery & key,
-        const std::list<InsertData::EntryPtr> & entries,
+        const InsertDataPtr & data,
         const Block & header,
         const ContextPtr & insert_context,
         const LoggerPtr logger,
@@ -250,7 +254,7 @@ private:
     template <typename LogFunc>
     static Chunk processPreprocessedEntries(
         const InsertQuery & key,
-        const std::list<InsertData::EntryPtr> & entries,
+        const InsertDataPtr & data,
         const Block & header,
         const ContextPtr & insert_context,
         LogFunc && add_to_async_insert_log);

From 17d2455448ed9c096c1e5d61aca36a47e6c5023d Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Thu, 4 Jan 2024 02:18:59 +0000
Subject: [PATCH 268/884] Integration tests for adaptive async insert timeout

Add initial tests.
---
 .../__init__.py                               |   0
 .../configs/users.xml                         |  14 +
 .../configs/zookeeper_config.xml              |   8 +
 .../test.py                                   | 372 ++++++++++++++++++
 ...68_adaptive_async_insert_timeout.reference |   0
 .../02968_adaptive_async_insert_timeout.sql   |  51 +++
 6 files changed, 445 insertions(+)
 create mode 100644 tests/integration/test_async_insert_adaptive_busy_timeout/__init__.py
 create mode 100644 tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
 create mode 100644 tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml
 create mode 100644 tests/integration/test_async_insert_adaptive_busy_timeout/test.py
 create mode 100644 tests/queries/0_stateless/02968_adaptive_async_insert_timeout.reference
 create mode 100644 tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql

diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/__init__.py b/tests/integration/test_async_insert_adaptive_busy_timeout/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
new file mode 100644
index 00000000000..f38fa9f0315
--- /dev/null
+++ b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
@@ -0,0 +1,14 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <allow_experimental_async_insert_adaptive_busy_timeout>1</allow_experimental_async_insert_adaptive_busy_timeout>
+        </default>
+    </profiles>
+
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml
new file mode 100644
index 00000000000..18412349228
--- /dev/null
+++ b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+    </zookeeper>
+</clickhouse>
diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py
new file mode 100644
index 00000000000..4d6261c0f63
--- /dev/null
+++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py
@@ -0,0 +1,372 @@
+import copy
+import logging
+import pytest
+import random
+import timeit
+
+from math import floor
+from multiprocessing import Pool
+from itertools import repeat
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/zookeeper_config.xml"],
+    user_configs=[
+        "configs/users.xml",
+    ],
+    with_zookeeper=True,
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+_query_settings = {"async_insert": 1, "wait_for_async_insert": 1}
+
+
+def _generate_values(size, min_int, max_int, array_size_range):
+    gen_tuple = lambda _min_int, _max_int, _array_size_range: (
+        random.randint(_min_int, _max_int),
+        [
+            random.randint(_min_int, _max_int)
+            for _ in range(random.randint(*_array_size_range))
+        ],
+    )
+
+    return map(lambda _: gen_tuple(min_int, max_int, array_size_range), range(size))
+
+
+def _insert_query(table_name, settings, *args, **kwargs):
+    settings_s = ", ".join("{}={}".format(k, settings[k]) for k in settings)
+    INSERT_QUERY = "INSERT INTO {} SETTINGS {} VALUES {}"
+    node.query(
+        INSERT_QUERY.format(
+            table_name,
+            settings_s,
+            ", ".join(map(str, _generate_values(*args, **kwargs))),
+        )
+    )
+
+
+def _insert_queries_sequentially(
+    table_name, settings, iterations, max_values_size, array_size_range
+):
+    for iter in range(iterations):
+        _insert_query(
+            table_name,
+            settings,
+            random.randint(1, max_values_size),
+            iter * max_values_size,
+            (iter + 1) * max_values_size - 1,
+            array_size_range,
+        )
+
+
+def _insert_queries_in_parallel(
+    table_name, settings, thread_num, tasks, max_values_size, array_size_range
+):
+    sizes = [random.randint(1, max_values_size) for _ in range(tasks)]
+    min_ints = [iter * max_values_size for iter in range(tasks)]
+    max_ints = [(iter + 1) * max_values_size - 1 for iter in range(tasks)]
+    with Pool(thread_num) as p:
+        p.starmap(
+            _insert_query,
+            zip(
+                repeat(table_name),
+                repeat(settings),
+                sizes,
+                min_ints,
+                max_ints,
+                repeat(array_size_range),
+            ),
+        )
+
+
+def test_with_merge_tree():
+    table_name = "async_insert_mt_table"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            table_name
+        )
+    )
+
+    _insert_queries_sequentially(
+        table_name,
+        _query_settings,
+        iterations=100,
+        max_values_size=1000,
+        array_size_range=[10, 50],
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(table_name))
+
+
+def test_with_merge_tree_multithread():
+    thread_num = 15
+    table_name = "async_insert_mt_multithread_table"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            table_name
+        )
+    )
+
+    _insert_queries_in_parallel(
+        table_name,
+        _query_settings,
+        thread_num=15,
+        tasks=1000,
+        max_values_size=1000,
+        array_size_range=[10, 15],
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(table_name))
+
+
+def test_with_replicated_merge_tree():
+    table_name = "async_insert_replicated_mt_table"
+
+    create_query = " ".join(
+        (
+            "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name),
+            "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test/{}', 'node')".format(
+                table_name
+            ),
+            "ORDER BY a",
+        )
+    )
+
+    node.query(create_query)
+
+    settings = _query_settings
+    _insert_queries_sequentially(
+        table_name,
+        settings,
+        iterations=100,
+        max_values_size=1000,
+        array_size_range=[10, 50],
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(table_name))
+
+
+def test_with_replicated_merge_tree_multithread():
+    thread_num = 15
+    table_name = "async_insert_replicated_mt_multithread_table"
+
+    create_query = " ".join(
+        (
+            "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name),
+            "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test/{}', 'node')".format(
+                table_name
+            ),
+            "ORDER BY a",
+        )
+    )
+
+    node.query(create_query)
+
+    _insert_queries_in_parallel(
+        table_name,
+        _query_settings,
+        thread_num=15,
+        tasks=1000,
+        max_values_size=1000,
+        array_size_range=[10, 15],
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(table_name))
+
+
+# Ensure that the combined duration of inserts with adaptive timeouts is less than
+# the combined duration for fixed timeouts.
+def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeouts():
+    fixed_tm_table_name = "async_insert_mt_fixed_async_timeout"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            fixed_tm_table_name
+        )
+    )
+
+    fixed_tm_settings = copy.copy(_query_settings)
+    fixed_tm_settings["allow_experimental_async_insert_adaptive_busy_timeout"] = 0
+    fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
+
+    fixed_tm_run_duration = timeit.timeit(
+        lambda: _insert_queries_sequentially(
+            fixed_tm_table_name,
+            fixed_tm_settings,
+            iterations=100,
+            max_values_size=1000,
+            array_size_range=[10, 50],
+        ),
+        setup="pass",
+        number=3,
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(fixed_tm_table_name))
+
+    logging.debug(
+        "Run duration with fixed asynchronous timeout is {} seconds".format(
+            fixed_tm_run_duration
+        )
+    )
+
+    adaptive_tm_table_name = "async_insert_mt_adaptive_async_timeout"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            adaptive_tm_table_name
+        )
+    )
+
+    adaptive_tm_settings = copy.copy(_query_settings)
+    adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10
+    adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 1000
+
+    adaptive_tm_run_duration = timeit.timeit(
+        lambda: _insert_queries_sequentially(
+            adaptive_tm_table_name,
+            adaptive_tm_settings,
+            iterations=100,
+            max_values_size=1000,
+            array_size_range=[10, 50],
+        ),
+        setup="pass",
+        number=3,
+    )
+
+    logging.debug(
+        "Run duration with adaptive asynchronous timeout is {} seconds.".format(
+            adaptive_tm_run_duration
+        )
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(adaptive_tm_table_name))
+
+    assert adaptive_tm_run_duration <= fixed_tm_run_duration
+
+
+# Ensure that the combined duration of inserts with adaptive timeouts is less than
+# the combined duration for fixed timeouts.
+def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeouts():
+    fixed_tm_table_name = "async_insert_mt_fixed_async_timeout"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            fixed_tm_table_name
+        )
+    )
+
+    fixed_tm_settings = copy.copy(_query_settings)
+    fixed_tm_settings["allow_experimental_async_insert_adaptive_busy_timeout"] = 0
+    fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
+
+    fixed_tm_run_duration = timeit.timeit(
+        lambda: _insert_queries_in_parallel(
+            fixed_tm_table_name,
+            fixed_tm_settings,
+            thread_num=15,
+            tasks=1000,
+            max_values_size=1000,
+            array_size_range=[10, 50],
+        ),
+        setup="pass",
+        number=3,
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(fixed_tm_table_name))
+
+    logging.debug(
+        "Run duration with fixed asynchronous timeout is {} seconds".format(
+            fixed_tm_run_duration
+        )
+    )
+
+    adaptive_tm_table_name = "async_insert_mt_adaptive_async_timeout"
+    node.query(
+        "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format(
+            adaptive_tm_table_name
+        )
+    )
+
+    adaptive_tm_settings = copy.copy(_query_settings)
+    adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10
+    adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 200
+
+    adaptive_tm_run_duration = timeit.timeit(
+        lambda: _insert_queries_in_parallel(
+            adaptive_tm_table_name,
+            adaptive_tm_settings,
+            thread_num=15,
+            tasks=100,
+            max_values_size=1000,
+            array_size_range=[10, 50],
+        ),
+        setup="pass",
+        number=3,
+    )
+
+    logging.debug(
+        "Run duration with adaptive asynchronous timeout is {} seconds.".format(
+            adaptive_tm_run_duration
+        )
+    )
+
+    node.query("DROP TABLE IF EXISTS {}".format(adaptive_tm_table_name))
+
+    assert adaptive_tm_run_duration <= fixed_tm_run_duration
+
+
+# Ensure that the delay converges to a minimum for sequential inserts and wait_for_async_insert=1.
+def test_change_queries_frequency():
+    table_name = "async_insert_mt_change_queries_frequencies"
+
+    create_query = " ".join(
+        (
+            "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name),
+            "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_frequencies/{}', 'node')".format(
+                table_name
+            ),
+            "ORDER BY a",
+        )
+    )
+
+    node.query(create_query)
+
+    settings = copy.copy(_query_settings)
+    min_ms = 50
+    settings["async_insert_busy_timeout_min_ms"] = min_ms
+    settings["async_insert_busy_timeout_max_ms"] = 2000
+
+    _insert_queries_in_parallel(
+        table_name,
+        settings,
+        thread_num=15,
+        tasks=2000,
+        max_values_size=1000,
+        array_size_range=[10, 15],
+    )
+
+    _insert_queries_sequentially(
+        table_name,
+        settings,
+        iterations=200,
+        max_values_size=1000,
+        array_size_range=[10, 50],
+    )
+
+    select_log_query = "SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 50"
+    res = node.query(select_log_query)
+    for line in res.splitlines():
+        assert int(line) == min_ms
+
+    node.query("DROP TABLE IF EXISTS {}".format(table_name))
diff --git a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.reference b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql
new file mode 100644
index 00000000000..bca01b9fc92
--- /dev/null
+++ b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql
@@ -0,0 +1,51 @@
+DROP TABLE IF EXISTS async_insert_mt_test;
+CREATE TABLE async_insert_mt_test (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a;
+
+SET allow_experimental_async_insert_adaptive_busy_timeout = 1;
+
+INSERT INTO async_insert_mt_test
+    SETTINGS
+        async_insert=1,
+        wait_for_async_insert=1,
+        async_insert_busy_timeout_min_ms=10,
+        async_insert_busy_timeout_max_ms=500,
+        async_insert_busy_timeout_increase_rate=1.0,
+        async_insert_busy_timeout_decrease_rate=1.0
+    VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]);
+
+
+INSERT INTO async_insert_mt_test
+    SETTINGS
+        async_insert=1,
+        wait_for_async_insert=1,
+        async_insert_busy_timeout_ms=500,
+        async_insert_busy_timeout_min_ms=500
+    VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]);
+
+
+INSERT INTO async_insert_mt_test
+    SETTINGS
+        async_insert=1,
+        wait_for_async_insert=1,
+        async_insert_busy_timeout_ms=100,
+        async_insert_busy_timeout_min_ms=500
+    VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]);
+
+
+INSERT INTO async_insert_mt_test
+    SETTINGS
+        async_insert=1,
+        wait_for_async_insert=1,
+        async_insert_busy_timeout_increase_rate=-1.0
+    VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); -- { serverError INVALID_SETTING_VALUE }
+
+
+INSERT INTO async_insert_mt_test
+    SETTINGS
+        async_insert=1,
+        wait_for_async_insert=1,
+        async_insert_busy_timeout_decrease_rate=-1.0
+    VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); -- { serverError INVALID_SETTING_VALUE }
+
+
+DROP TABLE IF EXISTS async_insert_mt_test;

From 689c368b76aae3afde0ea23ca0790a494dcbad89 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Sat, 20 Jan 2024 04:11:10 +0000
Subject: [PATCH 269/884] Adaptive async timeouts: consider queue flush history

In addition to the time since the most recent insert,
consider the elapsed time between the two recent queue
flushes when decreasing the timeout or processing an
entry synchronously.
---
 src/Interpreters/AsynchronousInsertQueue.cpp  | 63 +++++++++++++------
 src/Interpreters/AsynchronousInsertQueue.h    | 28 +++++++--
 .../configs/users.xml                         |  2 +-
 .../test.py                                   |  4 +-
 .../02968_adaptive_async_insert_timeout.sql   |  2 +-
 5 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 6844ae5e551..e7f292d9b77 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -33,7 +33,6 @@
 #include <Common/SipHash.h>
 #include <Common/logger_useful.h>
 
-
 namespace CurrentMetrics
 {
     extern const Metric PendingAsyncInsert;
@@ -174,12 +173,31 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
     }
 }
 
+AsynchronousInsertQueue::QueueShardFlushTimeHistory::TimePoints
+AsynchronousInsertQueue::QueueShardFlushTimeHistory::getRecentTimePoints() const
+{
+    std::shared_lock lock(mutex);
+    return time_points;
+}
+
+void AsynchronousInsertQueue::QueueShardFlushTimeHistory::updateWithCurrentTime()
+{
+    std::unique_lock lock(mutex);
+    time_points.first = time_points.second;
+    time_points.second = std::chrono::steady_clock::now();
+}
+
 AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_)
     : WithContext(context_)
     , pool_size(pool_size_)
     , flush_on_shutdown(flush_on_shutdown_)
     , queue_shards(pool_size)
-    , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, CurrentMetrics::AsynchronousInsertThreadsScheduled, pool_size)
+    , flush_time_history_per_queue_shard(pool_size)
+    , pool(
+          CurrentMetrics::AsynchronousInsertThreads,
+          CurrentMetrics::AsynchronousInsertThreadsActive,
+          CurrentMetrics::AsynchronousInsertThreadsScheduled,
+          pool_size)
 {
     if (!pool_size)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero");
@@ -210,7 +228,7 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
         if (flush_on_shutdown)
         {
             for (auto & [_, elem] : shard.queue)
-                scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext());
+                scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i);
         }
         else
         {
@@ -226,12 +244,14 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
     LOG_TRACE(log, "Asynchronous insertion queue finished");
 }
 
-void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context)
+void AsynchronousInsertQueue::scheduleDataProcessingJob(
+    const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num)
 {
     /// Wrap 'unique_ptr' with 'shared_ptr' to make this
     /// lambda copyable and allow to save it to the thread pool.
-    pool.scheduleOrThrowOnError([key, global_context, my_data = std::make_shared<InsertDataPtr>(std::move(data))]() mutable
-                                { processData(key, std::move(*my_data), std::move(global_context)); });
+    pool.scheduleOrThrowOnError(
+        [this, key, global_context, shard_num, my_data = std::make_shared<InsertDataPtr>(std::move(data))]() mutable
+        { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); });
 }
 
 void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context)
@@ -327,12 +347,13 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
 
     auto shard_num = key.hash % pool_size;
     auto & shard = queue_shards[shard_num];
+    const auto flush_time_points = flush_time_history_per_queue_shard[shard_num].getRecentTimePoints();
     {
         std::lock_guard lock(shard.mutex);
 
         auto [it, inserted] = shard.iterators.try_emplace(key.hash);
         auto now = std::chrono::steady_clock::now();
-        auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, now);
+        auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, flush_time_points, now);
         if (inserted)
             it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
 
@@ -352,13 +373,13 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
         bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size;
         bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate;
 
-        auto max_busy_timeout_exceeded = [&shard, &settings, &now]() -> bool
+        auto max_busy_timeout_exceeded = [&shard, &settings, &now, &flush_time_points]() -> bool
         {
-            if (!settings.async_insert_use_adaptive_busy_timeout || !shard.last_insert_time)
+            if (!settings.async_insert_use_adaptive_busy_timeout || !shard.last_insert_time || !flush_time_points.first)
                 return false;
 
             auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms);
-            return *shard.last_insert_time + max_ms < now;
+            return *shard.last_insert_time + max_ms < now && *flush_time_points.first + max_ms < *flush_time_points.second;
         };
 
         /// Here we check whether we have hit the limit on the maximum data size in the buffer or
@@ -395,7 +416,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
     }
 
     if (data_to_process)
-        scheduleDataProcessingJob(key, std::move(data_to_process), getContext());
+        scheduleDataProcessingJob(key, std::move(data_to_process), getContext(), shard_num);
     else
         shard.are_tasks_available.notify_one();
 
@@ -409,8 +430,9 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
 
 AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
     const Settings & settings,
-    const AsynchronousInsertQueue::QueueShard & shard,
+    const QueueShard & shard,
     size_t shard_num,
+    const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
     std::chrono::steady_clock::time_point now) const
 {
     if (!settings.async_insert_use_adaptive_busy_timeout)
@@ -421,10 +443,11 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
 
     auto normalize = [&min_ms, &max_ms](const auto & t_ms) { return std::min(std::max(t_ms, min_ms), max_ms); };
 
-    if (!shard.last_insert_time)
+    if (!shard.last_insert_time || !flush_time_points.first)
         return normalize(shard.busy_timeout_ms);
 
     const auto & last_insert_time = *shard.last_insert_time;
+    const auto & [t1, t2] = std::tie(*flush_time_points.first, *flush_time_points.second);
     const double increase_rate = settings.async_insert_busy_timeout_increase_rate;
     const double decrease_rate = settings.async_insert_busy_timeout_decrease_rate;
 
@@ -448,9 +471,10 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
         return normalize(timeout_ms);
     }
     /// Decrease the timeout if inserts are not frequent,
-    /// that is, if the time since the last insert long enough (exceeding the adjusted timeout).
+    /// that is, if the time since the last insert and the difference between the last two queue flushes were both
+    /// long enough (exceeding the adjusted timeout).
     /// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
-    else if (last_insert_time + decreased_timeout_ms < now)
+    else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2)
     {
         auto timeout_ms = decreased_timeout_ms;
         if (timeout_ms != shard.busy_timeout_ms)
@@ -525,7 +549,7 @@ void AsynchronousInsertQueue::flushAll()
         {
             total_bytes += entry.data->size_in_bytes;
             total_entries += entry.data->entries.size();
-            scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext());
+            scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), i);
         }
     }
 
@@ -599,7 +623,7 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
         }
 
         for (auto & entry : entries_to_flush)
-            scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext());
+            scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), shard_num);
     }
 }
 
@@ -643,7 +667,8 @@ String serializeQuery(const IAST & query, size_t max_length)
 }
 
 // static
-void AsynchronousInsertQueue::processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context)
+void AsynchronousInsertQueue::processData(
+    InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history)
 try
 {
     if (!data)
@@ -831,6 +856,8 @@ try
 
         LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
 
+        queue_shard_flush_time_history.updateWithCurrentTime();
+
         bool pulling_pipeline = false;
         logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
     }
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index 1f18cf56032..c2c4755f192 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -10,6 +10,7 @@
 #include <Processors/Chunk.h>
 
 #include <future>
+#include <shared_mutex>
 #include <variant>
 
 namespace DB
@@ -186,6 +187,8 @@ private:
     using QueueIterator = Queue::iterator;
     using QueueIteratorByKey = std::unordered_map<UInt128, QueueIterator>;
 
+    using OptionalTimePoint = std::optional<std::chrono::steady_clock::time_point>;
+
     struct QueueShard
     {
         mutable std::mutex mutex;
@@ -194,16 +197,29 @@ private:
         Queue queue;
         QueueIteratorByKey iterators;
 
-        using OptionalTimePoint = std::optional<std::chrono::steady_clock::time_point>;
         OptionalTimePoint last_insert_time;
-
         std::chrono::milliseconds busy_timeout_ms;
     };
 
+    /// Times of the two most recent queue flushes.
+    /// Used to calculate adaptive timeout.
+    struct QueueShardFlushTimeHistory
+    {
+    public:
+        using TimePoints = std::pair<OptionalTimePoint, OptionalTimePoint>;
+        TimePoints getRecentTimePoints() const;
+        void updateWithCurrentTime();
+
+    private:
+        mutable std::shared_mutex mutex;
+        TimePoints time_points;
+    };
+
     const size_t pool_size;
     const bool flush_on_shutdown;
 
     std::vector<QueueShard> queue_shards;
+    std::vector<QueueShardFlushTimeHistory> flush_time_history_per_queue_shard;
 
     /// Logic and events behind queue are as follows:
     ///  - async_insert_busy_timeout_ms:
@@ -231,16 +247,18 @@ private:
 
     Milliseconds getBusyWaitTimeoutMs(
         const Settings & settings,
-        const AsynchronousInsertQueue::QueueShard & shard,
+        const QueueShard & shard,
         size_t shard_num,
+        const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
         std::chrono::steady_clock::time_point now) const;
 
     void preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context);
 
     void processBatchDeadlines(size_t shard_num);
-    void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context);
+    void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num);
 
-    static void processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context);
+    static void processData(
+        InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history);
 
     template <typename LogFunc>
     static Chunk processEntriesWithParsing(
diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
index f38fa9f0315..755dc4ac269 100644
--- a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
+++ b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml
@@ -1,7 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
-            <allow_experimental_async_insert_adaptive_busy_timeout>1</allow_experimental_async_insert_adaptive_busy_timeout>
+            <async_insert_use_adaptive_busy_timeout>1</async_insert_use_adaptive_busy_timeout>
         </default>
     </profiles>
 
diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py
index 4d6261c0f63..93319a56d0f 100644
--- a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py
+++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py
@@ -199,7 +199,7 @@ def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeo
     )
 
     fixed_tm_settings = copy.copy(_query_settings)
-    fixed_tm_settings["allow_experimental_async_insert_adaptive_busy_timeout"] = 0
+    fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0
     fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
 
     fixed_tm_run_duration = timeit.timeit(
@@ -267,7 +267,7 @@ def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeout
     )
 
     fixed_tm_settings = copy.copy(_query_settings)
-    fixed_tm_settings["allow_experimental_async_insert_adaptive_busy_timeout"] = 0
+    fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0
     fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
 
     fixed_tm_run_duration = timeit.timeit(
diff --git a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql
index bca01b9fc92..f9606cace6e 100644
--- a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql
+++ b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS async_insert_mt_test;
 CREATE TABLE async_insert_mt_test (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a;
 
-SET allow_experimental_async_insert_adaptive_busy_timeout = 1;
+SET async_insert_use_adaptive_busy_timeout = 1;
 
 INSERT INTO async_insert_mt_test
     SETTINGS

From 5b07039ab2c6db4a8d5b0418f334febd65419584 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Fri, 26 Jan 2024 22:55:30 +0000
Subject: [PATCH 270/884] Update settings.md

---
 docs/en/operations/settings/settings.md | 59 ++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index c673464b23d..7aa033bf4ce 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1935,7 +1935,7 @@ Possible values:
 
 Default value: `450`.
 
-### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
+### async_insert_busy_timeout_max_ms {#async-insert-busy-timeout-max-ms}
 
 The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data.
 
@@ -1946,6 +1946,61 @@ Possible values:
 
 Default value: `200`.
 
+### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
+
+Timeout in milliseconds for polling data from asynchronous insert queue.
+
+Possible values:
+
+- Positive integer.
+
+Default value: `10`.
+
+### async_insert_use_adaptive_busy_timeout {#allow-experimental-async-insert-adaptive-busy-timeout}
+
+Use adaptive asynchronous insert timeout.
+
+Possible values:
+
+- 0 - Disabled.
+- 1 - Enabled.
+
+Default value: `0`.
+
+### async_insert_busy_timeout_min_ms {#async-insert-busy-timeout-min-ms}
+
+If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the minimum value of the asynchronous insert timeout in milliseconds. It also serves as the initial value, which may be increased later by the adaptive algorithm, up to the [async_insert_busy_timeout_ms](#async_insert_busy_timeout_ms).
+
+Possible values:
+
+- Positive integer.
+
+Default value: `50`.
+
+### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
+
+Alias for [`async_insert_busy_timeout_max_ms`](#async_insert_busy_timeout_max_ms).
+
+### async_insert_busy_timeout_increase_rate {#async-insert-busy-timeout-increase-rate}
+
+If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout increases.
+
+Possible values:
+
+- A positive floating-point number.
+
+Default value: `0.2`.
+
+### async_insert_busy_timeout_decrease_rate {#async-insert-busy-timeout-decrease-rate}
+
+If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout decreases.
+
+Possible values:
+
+- A positive floating-point number.
+
+Default value: `0.2`.
+
 ### async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
 
 The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded.
@@ -5321,4 +5376,4 @@ Allow to ignore schema evolution in Iceberg table engine and read all data using
 Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema.
 :::
 
-Default value: 'false'.
\ No newline at end of file
+Default value: 'false'.

From b2508bd70f0b0f33e2eaf5603e67337bf1cbca76 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 2 Feb 2024 03:38:10 +0100
Subject: [PATCH 271/884] Use threadpool size as feature flag

---
 src/Core/ServerSettings.h            | 3 +--
 src/Databases/DatabaseReplicated.cpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 0d48ddca687..99fe66c34ad 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -114,8 +114,7 @@ namespace DB
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
-    M(Bool, allow_database_replicated_concurrent_table_creation, false, "Allow concurrent table creation during replica recovery in DatabaseReplicated.", 0) \
-    M(UInt64, max_database_replicated_create_table_thread_pool_size, 32, "The number of threads to create tables during replica recovery in DatabaseReplicated.", 0) \
+    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means tables will be created sequentially.", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 6db6bb8c034..0d02d2a5a1c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1093,7 +1093,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     tables_dependencies.checkNoCyclicDependencies();
 
-    auto allow_concurrent_table_creation = getContext()->getServerSettings().allow_database_replicated_concurrent_table_creation;
+    auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 0;
     auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
 
     auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");

From 7c715e66a270e5f1dde41a64ab347e1c7008c16a Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Tue, 30 Jan 2024 03:27:46 +0000
Subject: [PATCH 272/884] Update existing tests

---
 tests/queries/0_stateless/02015_async_inserts_2.sh            | 4 +++-
 tests/queries/0_stateless/02134_async_inserts_formats.sh      | 4 +++-
 tests/queries/0_stateless/02726_async_insert_flush_queue.sql  | 3 ++-
 .../02810_async_insert_dedup_replicated_collapsing.sh         | 4 ++--
 .../0_stateless/02884_async_insert_native_protocol_1.sh       | 2 +-
 .../0_stateless/02884_async_insert_native_protocol_3.sh       | 2 +-
 .../queries/0_stateless/02884_async_insert_skip_settings.sql  | 3 ++-
 7 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh
index 48523ccd9a9..606d4cc37b6 100755
--- a/tests/queries/0_stateless/02015_async_inserts_2.sh
+++ b/tests/queries/0_stateless/02015_async_inserts_2.sh
@@ -5,7 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3&async_insert_deduplicate=1"
+# With adaptive timeout enabled, the asynchronous queue can be flushed synchronously, depending on the elapsed since the last insert.
+# This may result in test flakiness.
+url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3&async_insert_deduplicate=1&async_insert_use_adaptive_busy_timeout=0"
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id"
diff --git a/tests/queries/0_stateless/02134_async_inserts_formats.sh b/tests/queries/0_stateless/02134_async_inserts_formats.sh
index 631809e5dc2..89705bf6415 100755
--- a/tests/queries/0_stateless/02134_async_inserts_formats.sh
+++ b/tests/queries/0_stateless/02134_async_inserts_formats.sh
@@ -4,7 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1"
+# With adaptive timeout enabled, the asynchronous queue can be flushed synchronously, depending on the elapsed since the last insert.
+# This may result in test flakiness.
+url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_use_adaptive_busy_timeout=0"
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id"
diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql
index 98e78045b85..1ae24e4f3da 100644
--- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql
+++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql
@@ -6,7 +6,8 @@ CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory;
 
 SET async_insert = 1;
 SET wait_for_async_insert = 0;
-SET async_insert_busy_timeout_ms = 1000000;
+SET async_insert_busy_timeout_min_ms = 1000000;
+SET async_insert_busy_timeout_max_ms = 10000000;
 
 INSERT INTO t_async_inserts_flush VALUES (1) (2);
 INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20};
diff --git a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh
index 804cd894ebc..57950af8975 100755
--- a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh
+++ b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS 02810_async_insert_dedup_collapsing"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE 02810_async_insert_dedup_collapsing (stringvalue String, sign Int8) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/{database}/02810_async_insert_dedup', 'r1', sign) ORDER BY stringvalue"
 
-url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_deduplicate=1"
+url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_use_adaptive_busy_timeout=0&async_insert_deduplicate=1"
 
 # insert value with same key and sign so it's collapsed on insert
 ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" &
@@ -36,4 +36,4 @@ wait
 ${CLICKHOUSE_CLIENT} -q "SELECT stringvalue FROM 02810_async_insert_dedup_collapsing ORDER BY stringvalue"
 ${CLICKHOUSE_CLIENT} -q "SELECT '------------'"
 
-${CLICKHOUSE_CLIENT} -q "DROP TABLE 02810_async_insert_dedup_collapsing"
\ No newline at end of file
+${CLICKHOUSE_CLIENT} -q "DROP TABLE 02810_async_insert_dedup_collapsing"
diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh
index 82e2bb709f9..7f583087336 100755
--- a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh
+++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh
@@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -n -q "
     CREATE TABLE t_async_insert_native_1 (id UInt64, s String) ENGINE = MergeTree ORDER BY id;
 "
 
-async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_ms 1000000"
+async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_min_ms 1000000 --async_insert_busy_timeout_max_ms 10000000"
 
 echo '{"id": 1, "s": "aaa"} {"id": 2, "s": "bbb"}' | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow'
 $CLICKHOUSE_CLIENT $async_insert_options  -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow {"id": 3, "s": "ccc"}'
diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh
index abe6be9e2bc..c9d399607d0 100755
--- a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh
+++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh
@@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -n -q "
     CREATE TABLE t_async_insert_native_3 (id UInt64, s String) ENGINE = MergeTree ORDER BY id;
 "
 
-async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_ms 1000000"
+async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_min_ms 1000000 --async_insert_busy_timeout_max_ms 10000000"
 
 echo '{"id": 1, "s": "aaa"} {"id": 2, "s": "bbb"}' | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_3 FORMAT JSONEachRow'
 echo "(3, 'ccc') (4, 'ddd') (5, 'eee')" | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_3 FORMAT Values'
diff --git a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql
index facd39d1079..9bc689fb4ec 100644
--- a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql
+++ b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql
@@ -9,7 +9,8 @@ ORDER BY id;
 SET async_insert = 1;
 SET async_insert_deduplicate = 1;
 SET wait_for_async_insert = 0;
-SET async_insert_busy_timeout_ms = 100000;
+SET async_insert_busy_timeout_min_ms = 100000;
+SET async_insert_busy_timeout_max_ms = 1000000;
 
 SET insert_deduplication_token = '1';
 SET log_comment = 'async_insert_skip_settings_1';

From 505b9bac44f3cd6a4803224d5055a2d85ac349ad Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Wed, 31 Jan 2024 20:19:25 +0000
Subject: [PATCH 273/884] Update SettingsChangesHistory.h

---
 src/Core/SettingsChangesHistory.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index ad04ee79995..c3b0cee00a4 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -84,6 +84,12 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.2", {{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
+              {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
+              {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
+              {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
+              {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
+              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

From a9674b9cd49420a30ec09c550b59fb0702d5b3d2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 2 Feb 2024 03:39:17 +0100
Subject: [PATCH 274/884] Enable in tests

---
 tests/config/config.d/database_replicated.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/config.d/database_replicated.xml b/tests/config/config.d/database_replicated.xml
index 2504a7ca526..d8bed2f08fe 100644
--- a/tests/config/config.d/database_replicated.xml
+++ b/tests/config/config.d/database_replicated.xml
@@ -97,4 +97,5 @@
     </remote_servers>
 
     <_functional_tests_helper_database_replicated_replace_args_macros>1</_functional_tests_helper_database_replicated_replace_args_macros>
+    <max_database_replicated_create_table_thread_pool_size>50</max_database_replicated_create_table_thread_pool_size>
 </clickhouse>

From eba094e228cdc53e4cb9eea35a8860d6f7ba2fac Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 2 Feb 2024 16:48:30 +0800
Subject: [PATCH 275/884] optimize sum decimal and bitint conditionally

---
 src/AggregateFunctions/AggregateFunctionSum.h | 36 ++++++++++++++++---
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index 5781ab69c6b..b3ba7cc7f57 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -146,9 +146,7 @@ struct AggregateFunctionSumData
         size_t count = end - start;
         const auto * end_ptr = ptr + count;
 
-        if constexpr (
-            (is_integer<T> && !is_big_int_v<T>)
-            || (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
+        if constexpr ((is_integer<T> || is_decimal<T>)&&!is_over_big_int<T>)
         {
             /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
             /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@@ -163,8 +161,38 @@ struct AggregateFunctionSumData
             Impl::add(sum, local_sum);
             return;
         }
+        else if constexpr (is_integer<T> || is_decimal<T>)
+        {
+            /// Use a mask to discard the value if it is null
+            T local_sum{};
+            using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
+            alignas(64) const MaskType masks[2] = {0, -1};
+            while (ptr < end_ptr)
+            {
+                Value v = *ptr;
+                if constexpr (!add_if_zero)
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[*condition_map];
+                    else
+                        v.value &= masks[*condition_map];
+                }
+                else
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[!*condition_map];
+                    else
+                        v.value &= masks[!*condition_map];
+                }
 
-        if constexpr (std::is_floating_point_v<T>)
+                Impl::add(local_sum, v);
+                ++ptr;
+                ++condition_map;
+            }
+            Impl::add(sum, local_sum);
+            return;
+        }
+        else if constexpr (std::is_floating_point_v<T>)
         {
             /// For floating point we use a similar trick as above, except that now we  reinterpret the floating point number as an unsigned
             /// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)

From 1faa04de48eb7114620a74ce4f5d4e1ad83e490a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 2 Feb 2024 08:58:43 +0000
Subject: [PATCH 276/884] Fix typo

---
 docs/en/sql-reference/statements/alter/column.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 193136cf8ba..f6d9668e628 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -159,7 +159,7 @@ For examples of columns compression CODECS modifying, see [Column Compression Co
 
 For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
 
-For examples of colum-level settings modifying, see [Column-level Settings](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#column-level-settings).
+For examples of column-level settings modifying, see [Column-level Settings](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#column-level-settings).
 
 If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
 

From dd484fc31201e411dd63b1a3dc9d7927367d144c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 22:12:33 +0000
Subject: [PATCH 277/884] Simplify what happens in SELECT all, ... [...] ORDER
 BY ALL

Previously setting `enable_order_by_all` distinguished for ORDER BY ALL
whether we should sort by column 'all' (if given in the SELECT clause)
or by all columns. The actual behavior was not always intuitive.

Now, we throw unconditionally an exception which also simplifies the
handling a bit. Only an edge case is affected and if users really want
to run ORDER BY ALL on a column names 'all', they can alias it.
---
 docs/en/operations/settings/settings.md       | 37 +------------------
 .../statements/select/order-by.md             |  4 +-
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |  8 ++--
 src/Core/Settings.h                           |  2 +-
 src/Interpreters/TreeRewriter.cpp             | 21 +++++++----
 .../0_stateless/02943_order_by_all.reference  | 30 ++++++---------
 .../0_stateless/02943_order_by_all.sql        | 30 +++++++--------
 7 files changed, 49 insertions(+), 83 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index c673464b23d..4e73afa3ed9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4192,41 +4192,6 @@ Result:
 └─────┴─────┴───────┘
 ```
 
-## enable_order_by_all {#enable-order-by-all}
-
-Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md)
-
-Possible values:
-
-- 0 — Disable ORDER BY ALL.
-- 1 — Enable ORDER BY ALL.
-
-Default value: `1`.
-
-**Example**
-
-Query:
-
-```sql
-CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
-
-INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
-
-SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
-
-SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all;
-```
-
-Result:
-
-```text
-┌─C1─┬─C2─┬─ALL─┐
-│ 20 │ 20 │  10 │
-│ 30 │ 10 │  20 │
-│ 10 │ 20 │  30 │
-└────┴────┴─────┘
-```
-
 ## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
 
 Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
@@ -5321,4 +5286,4 @@ Allow to ignore schema evolution in Iceberg table engine and read all data using
 Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema.
 :::
 
-Default value: 'false'.
\ No newline at end of file
+Default value: 'false'.
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index d6432a7b4f8..bea5dcab461 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -9,10 +9,10 @@ The `ORDER BY` clause contains
 
 - a list of expressions, e.g. `ORDER BY visits, search_phrase`,
 - a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
-- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
+- `ALL` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY ALL`.
 
 To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
-To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0.
+`ORDER BY ALL` cannot be used when the `SELECT` clause contains identifiers or aliases named `all` (case-insensitively).
 
 The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
 Unless an explicit sort order is specified, `ASC` is used by default.
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index cb1e94305fb..a2c719606d8 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1214,7 +1214,7 @@ private:
 
     static void expandGroupByAll(QueryNode & query_tree_node_typed);
 
-    void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
+    void expandOrderByAll(QueryNode & query_tree_node_typed);
 
     static std::string
     rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@@ -2334,9 +2334,9 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
     query_tree_node_typed.setIsGroupByAll(false);
 }
 
-void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings)
+void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
 {
-    if (!settings.enable_order_by_all || !query_tree_node_typed.isOrderByAll())
+    if (!query_tree_node_typed.isOrderByAll())
         return;
 
     auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
@@ -7369,7 +7369,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         if (settings.enable_positional_arguments)
             replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
 
-        expandOrderByAll(query_node_typed, settings);
+        expandOrderByAll(query_node_typed);
         resolveSortNodeList(query_node_typed.getOrderByNode(), scope);
     }
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4bb48cb3a29..fca0554dc99 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -861,7 +861,6 @@ class IColumn;
     M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \
     M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
     M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
-    M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \
     M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
 
 // End of COMMON_SETTINGS
@@ -929,6 +928,7 @@ class IColumn;
     MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \
     MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \
+    MAKE_OBSOLETE(M, Bool, enable_order_by_all, true) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index ecd021328e7..0a260969cd4 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -788,14 +788,21 @@ void expandOrderByAll(ASTSelectQuery * select_query)
     for (const auto & expr : select_query->select()->children)
     {
         if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
-            if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
-
+        {
+            if (identifier->alias.empty())
+            {
+                if (Poco::toUpper(identifier->name()) == "ALL")
+                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all'");
+            }
+            else
+            {
+                if (Poco::toUpper(identifier->alias) == "ALL")
+                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column alias with name 'all'");
+            }
+        }
         if (auto * function = expr->as<ASTFunction>(); function != nullptr)
             if (Poco::toUpper(function->alias) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort an expression with name 'all'");
 
         auto elem = std::make_shared<ASTOrderByElement>();
         elem->direction = all_elem->direction;
@@ -1324,7 +1331,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
         expandGroupByAll(select_query);
 
     // expand ORDER BY ALL
-    if (settings.enable_order_by_all && select_query->order_by_all)
+    if (select_query->order_by_all)
         expandOrderByAll(select_query);
 
     /// Remove unneeded columns according to 'required_result_columns'.
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 6eed33cc68d..d91f6dfc4a5 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -49,15 +49,14 @@ A	2
 2	A
 3	B
 \N	C
--- what happens if some column "all" already exists?
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
+-- "ALL" in ORDER BY is case-insensitive
+A	2
+B	3
+C	\N
+D	1
+A	2
+B	3
+C	\N
 D	1
 A	2
 B	3
@@ -66,14 +65,9 @@ D	1
 A	2
 B	3
 C	\N
-A 2
-B 3
-D 1
-\N
-A 2
-B 3
-D 1
-\N
+D	1
+-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity
+-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning
 B	3	10
 D	1	20
 A	2	30
@@ -82,7 +76,7 @@ B	3	10
 D	1	20
 A	2	30
 C	\N	40
--- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause
+-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)
 A	2	30
 B	3	10
 C	\N	40
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 0960d75ad96..f10184e79b9 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -42,43 +42,43 @@ SET allow_experimental_analyzer = 1;
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
 
-SELECT '-- what happens if some column "all" already exists?';
+SELECT '-- "ALL" in ORDER BY is case-insensitive';
+
+SET allow_experimental_analyzer = 0;
+SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT a, b FROM order_by_all ORDER BY all;
+
+SET allow_experimental_analyzer = 1;
+SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT a, b FROM order_by_all ORDER BY all;
+
+SELECT '-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity';
 
 -- columns
 
 SET allow_experimental_analyzer = 0;
-SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 SET allow_experimental_analyzer = 1;
-SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 -- column aliases
 
 SET allow_experimental_analyzer = 0;
-SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 SET allow_experimental_analyzer = 1;
-SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 -- expressions
 
 SET allow_experimental_analyzer = 0;
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 SET allow_experimental_analyzer = 1;
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SELECT '-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning';
 
 SET allow_experimental_analyzer = 0;
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
@@ -88,7 +88,7 @@ SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
 DROP TABLE order_by_all;
 
-SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause';
+SELECT '-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)';
 
 CREATE TABLE order_by_all
 (
@@ -96,7 +96,7 @@ CREATE TABLE order_by_all
     b Nullable(Int32),
     c UInt64,
 )
-    ENGINE = Memory;
+ENGINE = Memory;
 
 INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
 

From 654f274c014fe46005231651dd89d4e6f27c86f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 2 Feb 2024 11:06:46 +0100
Subject: [PATCH 278/884] Missing refs

---
 .../0_stateless/02982_minmax_nan_null_order.reference  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/queries/0_stateless/02982_minmax_nan_null_order.reference b/tests/queries/0_stateless/02982_minmax_nan_null_order.reference
index 2d8b7f8843e..2b3ef0b7733 100644
--- a/tests/queries/0_stateless/02982_minmax_nan_null_order.reference
+++ b/tests/queries/0_stateless/02982_minmax_nan_null_order.reference
@@ -2,12 +2,16 @@
 -- Tuples with NaN
 SELECT min((c1, c2)), max((c1, c2)) FROM values((nan, 0.), (0., 0.), (5., 5.));
 (0,0)	(5,5)
+SELECT minIf((c1, c2), c2 >= 0.0), maxIf((c1, c2), c2 >= 0.0) FROM values((nan, 0.), (0., 0.), (5., 5.));
+(0,0)	(5,5)
 SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
 (0,0)
 SELECT (c1, c2) as t FROM values((nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
 (5,5)
 SELECT min((c1, c2)), max((c1, c2)) FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.));
 (-5,0)	(5,5)
+SELECT minIf((c1, c2), c2 >= 0.0), maxIf((c1, c2), c2 >= 0.0) FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.));
+(-5,0)	(5,5)
 SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
 (-5,0)
 SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
@@ -15,12 +19,16 @@ SELECT (c1, c2) as t FROM values((-5, 0), (nan, 0.), (0., 0.), (5., 5.)) ORDER B
 -- Tuples with NULL
 SELECT min((c1, c2)), max((c1, c2)) FROM values((NULL, 0.), (0., 0.), (5., 5.));
 (0,0)	(5,5)
+SELECT minIf((c1, c2), c2 >= 0), maxIf((c1, c2), c2 >= 0) FROM values((NULL, 0.), (0., 0.), (5., 5.));
+(0,0)	(5,5)
 SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t ASC LIMIT 1;
 (0,0)
 SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.)) ORDER BY t DESC LIMIT 1;
 (5,5)
 SELECT min((c1, c2)), max((c1, c2)) FROM values((0., 0.), (5., 5.), (NULL, 0.));
 (0,0)	(5,5)
+SELECT minIf((c1, c2), c2 >= 0), maxIf((c1, c2), c2 >= 0) FROM values((0., 0.), (5., 5.), (NULL, 0.));
+(0,0)	(5,5)
 SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t ASC LIMIT 1;
 (0,0)
 SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORDER BY t DESC LIMIT 1;
@@ -28,6 +36,8 @@ SELECT (c1, c2) as t FROM values((NULL, 0.), (0., 0.), (5., 5.), (NULL, 0.)) ORD
 -- Map with NULL
 SELECT min(map(0, c1)), max(map(0, c1)) FROM values(NULL, 0, 5., 5.);
 {0:0}	{0:5}
+SELECT minIf(map(0, c1), assumeNotNull(c1) >= 0), maxIf(map(0, c1), assumeNotNull(c1) >= 0) FROM values(NULL, 0, 5., 5.);
+{0:0}	{0:5}
 SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t ASC LIMIT 1;
 {0:0}
 SELECT map(0, c1) as t FROM values(NULL, 0, 5., 5.) ORDER BY t DESC LIMIT 1;

From a24849129947f221f377889d75fc44effba63d18 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:14:03 +0100
Subject: [PATCH 279/884] remove new settings from older versions

---
 docker/test/upgrade/run.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index aaba5cc6a8c..af535325119 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -77,6 +77,12 @@ remove_keeper_config "async_replication" "1"
 # create_if_not_exists feature flag doesn't exist on some older versions
 remove_keeper_config "create_if_not_exists" "[01]"
 
+# latest_logs_cache_size_threshold setting doesn't exist on some older versions
+remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
+
+# commit_logs_cache_size_threshold setting doesn't exist on some older versions
+remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
+
 # it contains some new settings, but we can safely remove it
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml

From e997168b50a7d2f471d1d8f552e0b7f6a2df495a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 12:46:35 +0000
Subject: [PATCH 280/884] Support memory limits in nested cgroups

---
 base/base/getMemoryAmount.cpp | 63 +++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index 060de685778..a55dabdcf46 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -3,6 +3,7 @@
 #include <base/getPageSize.h>
 
 #include <fstream>
+#include <sstream>
 #include <stdexcept>
 
 #include <unistd.h>
@@ -16,18 +17,59 @@
 namespace
 {
 
-std::optional<uint64_t> getCgroupsV2MemoryLimit(const std::string & setting)
+std::optional<uint64_t> getCgroupsV2MemoryLimit()
 {
 #if defined(OS_LINUX)
-    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
-    std::ifstream file(default_cgroups_mount / setting);
-    if (!file.is_open())
+    const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
+
+    /// This file exists iff the host has cgroups v2 enabled.
+    std::ifstream controllers_file(default_cgroups_mount / "cgroup.controllers");
+    if (!controllers_file.is_open())
         return {};
-    uint64_t value;
-    if (file >> value)
-        return {value};
-    else
-        return {}; /// e.g. the cgroups default "max"
+
+    /// We also need the memory controller enabled
+    std::stringstream controllers_buf;
+    controllers_buf << controllers_file.rdbuf();
+    std::string controllers = controllers_buf.str();
+    if (controllers.find("memory") == std::string::npos)
+        return {};
+
+    /// Identify the cgroup the process belongs to
+    std::ifstream cgroup_name_file("/proc/self/cgroup");
+    if (!cgroup_name_file.is_open())
+        return {};
+
+    std::stringstream cgroup_name_buf;
+    cgroup_name_buf << cgroup_name_file.rdbuf();
+    std::string cgroup_name = cgroup_name_buf.str();
+    if (!cgroup_name.empty() && cgroup_name.back() == '\n')
+        cgroup_name.pop_back(); /// remove trailing newline, if any
+    /// cgroups v2 will show a single line with prefix "0::/"
+    /// - https://book.hacktricks.xyz/linux-hardening/privilege-escalation/docker-security/cgroups
+    const std::string v2_prefix = "0::/";
+    if (cgroup_name.find('\n') != std::string::npos || !cgroup_name.starts_with(v2_prefix))
+        return {};
+    cgroup_name = cgroup_name.substr(v2_prefix.length());
+
+    std::filesystem::path current_cgroup = cgroup_name.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup_name);
+
+    /// Open the bottom-most nested memory limit setting file. If there is no such file at the current
+    /// level, try again at the parent level as memory settings are inherited.
+    while (current_cgroup != default_cgroups_mount.parent_path())
+    {
+        std::ifstream setting_file(current_cgroup / "memory.max");
+        if (setting_file.is_open())
+        {
+            uint64_t value;
+            if (setting_file >> value)
+                return {value};
+            else
+                return {}; /// e.g. the cgroups default "max"
+        }
+        current_cgroup = current_cgroup.parent_path();
+    }
+
+    return {};
 #else
     return {};
 #endif
@@ -52,8 +94,7 @@ uint64_t getMemoryAmountOrZero()
 
     /// Respect the memory limit set by cgroups v2.
     /// Cgroups v1 is dead since many years and its limits are not considered for simplicity.
-
-    auto limit = getCgroupsV2MemoryLimit("memory.max");
+    auto limit = getCgroupsV2MemoryLimit();
     if (limit.has_value() && *limit < memory_amount)
          memory_amount = *limit;
 

From 80a0e05ac4e3fc6d1837877bad2be7a1016ee4e6 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 31 Jan 2024 15:04:24 +0000
Subject: [PATCH 281/884] Resurrect v1

---
 base/base/getMemoryAmount.cpp | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index a55dabdcf46..7f50683944e 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -93,10 +93,24 @@ uint64_t getMemoryAmountOrZero()
     uint64_t memory_amount = num_pages * page_size;
 
     /// Respect the memory limit set by cgroups v2.
-    /// Cgroups v1 is dead since many years and its limits are not considered for simplicity.
-    auto limit = getCgroupsV2MemoryLimit();
-    if (limit.has_value() && *limit < memory_amount)
-         memory_amount = *limit;
+    auto limit_v2 = getCgroupsV2MemoryLimit();
+    if (limit_v2.has_value() && *limit_v2 < memory_amount)
+         memory_amount = *limit_v2;
+    else
+    {
+        /// Cgroups v1 were replaced by v2 in 2015. The only reason we keep supporting v1 is that the transition to v2
+        /// has been slow. Caveat : Hierarchical groups as in v2 are not supported for v1, the location of the memory
+        /// limit (virtual) file is hard-coded.
+        /// TODO: check at the end of 2024 if we can get rid of v1.
+        std::ifstream limit_file_v1("/sys/fs/cgroup/memory/memory.limit_in_bytes");
+        if (limit_file_v1.is_open())
+        {
+            uint64_t limit_v1;
+            if (limit_file_v1 >> limit_v1)
+                if (limit_v1 < memory_amount)
+                    memory_amount = limit_v1;
+        }
+    }
 
     return memory_amount;
 }

From 87d493533c8f08ce67c571ee644263f887b0fb86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 2 Feb 2024 12:10:05 +0100
Subject: [PATCH 282/884] Revert "Poco Logger small refactoring"

---
 base/poco/Foundation/CMakeLists.txt           |   6 -
 base/poco/Foundation/include/Poco/Logger.h    |  42 ++--
 .../include/Poco/RefCountedObject.h           |   3 +-
 base/poco/Foundation/src/Logger.cpp           | 226 +++++++-----------
 src/Common/Logger.h                           |  12 -
 src/Common/tests/gtest_log.cpp                |  73 ------
 .../ObjectStorages/ObjectStorageFactory.cpp   |   2 +-
 .../ObjectStorages/Web/WebObjectStorage.cpp   |   2 +-
 .../MergeTree/MergeTreeDataPartCloner.cpp     |   2 +-
 utils/keeper-data-dumper/main.cpp             |   2 +-
 10 files changed, 114 insertions(+), 256 deletions(-)

diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt
index 5fe644d3057..dfb41a33fb1 100644
--- a/base/poco/Foundation/CMakeLists.txt
+++ b/base/poco/Foundation/CMakeLists.txt
@@ -166,12 +166,6 @@ set (SRCS
 )
 
 add_library (_poco_foundation ${SRCS})
-target_link_libraries (_poco_foundation
-    PUBLIC
-        boost::headers_only
-        boost::system
-)
-
 add_library (Poco::Foundation ALIAS _poco_foundation)
 
 # TODO: remove these warning exclusions
diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h
index 883294a071a..cf202718662 100644
--- a/base/poco/Foundation/include/Poco/Logger.h
+++ b/base/poco/Foundation/include/Poco/Logger.h
@@ -22,9 +22,6 @@
 #include <cstddef>
 #include <map>
 #include <vector>
-
-#include <boost/smart_ptr/intrusive_ptr.hpp>
-
 #include "Poco/Channel.h"
 #include "Poco/Format.h"
 #include "Poco/Foundation.h"
@@ -37,7 +34,7 @@ namespace Poco
 
 class Exception;
 class Logger;
-using LoggerPtr = boost::intrusive_ptr<Logger>;
+using LoggerPtr = std::shared_ptr<Logger>;
 
 class Foundation_API Logger : public Channel
 /// Logger is a special Channel that acts as the main
@@ -874,11 +871,21 @@ public:
     /// If the Logger does not yet exist, it is created, based
     /// on its parent logger.
 
-    static LoggerPtr getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created = true);
+    static LoggerPtr getShared(const std::string & name);
     /// Returns a shared pointer to the Logger with the given name.
     /// If the Logger does not yet exist, it is created, based
     /// on its parent logger.
 
+    static Logger & unsafeGet(const std::string & name);
+    /// Returns a reference to the Logger with the given name.
+    /// If the Logger does not yet exist, it is created, based
+    /// on its parent logger.
+    ///
+    /// WARNING: This method is not thread safe. You should
+    /// probably use get() instead.
+    /// The only time this method should be used is during
+    /// program initialization, when only one thread is running.
+
     static Logger & create(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
     /// Creates and returns a reference to a Logger with the
     /// given name. The Logger's Channel and log level as set as
@@ -925,16 +932,6 @@ public:
 
     static const std::string ROOT; /// The name of the root logger ("").
 
-public:
-    struct LoggerEntry
-    {
-        Poco::Logger * logger;
-        bool owned_by_shared_ptr = false;
-    };
-
-    using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
-    using LoggerMapIterator = LoggerMap::iterator;
-
 protected:
     Logger(const std::string & name, Channel * pChannel, int level);
     ~Logger();
@@ -943,19 +940,12 @@ protected:
     void log(const std::string & text, Message::Priority prio, const char * file, int line);
 
     static std::string format(const std::string & fmt, int argc, std::string argv[]);
+    static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
+    static Logger & parent(const std::string & name);
+    static void add(Logger * pLogger);
+    static Logger * find(const std::string & name);
 
 private:
-    static std::pair<Logger::LoggerMapIterator, bool> unsafeGet(const std::string & name, bool get_shared);
-    static Logger * unsafeGetRawPtr(const std::string & name);
-    static std::pair<LoggerMapIterator, bool> unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
-    static Logger & parent(const std::string & name);
-    static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
-    static std::optional<LoggerMapIterator> find(const std::string & name);
-    static Logger * findRawPtr(const std::string & name);
-
-    friend void intrusive_ptr_add_ref(Logger * ptr);
-    friend void intrusive_ptr_release(Logger * ptr);
-
     Logger();
     Logger(const Logger &);
     Logger & operator=(const Logger &);
diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h
index 1f806bdacb1..db966089e00 100644
--- a/base/poco/Foundation/include/Poco/RefCountedObject.h
+++ b/base/poco/Foundation/include/Poco/RefCountedObject.h
@@ -53,10 +53,11 @@ protected:
     virtual ~RefCountedObject();
     /// Destroys the RefCountedObject.
 
-    mutable std::atomic<size_t> _counter;
 private:
     RefCountedObject(const RefCountedObject &);
     RefCountedObject & operator=(const RefCountedObject &);
+
+    mutable std::atomic<size_t> _counter;
 };
 
 
diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp
index 16fc3a0480e..cfc063c8979 100644
--- a/base/poco/Foundation/src/Logger.cpp
+++ b/base/poco/Foundation/src/Logger.cpp
@@ -38,7 +38,14 @@ std::mutex & getLoggerMutex()
 	return *logger_mutex;
 }
 
-Poco::Logger::LoggerMap * _pLoggerMap = nullptr;
+struct LoggerEntry
+{
+	Poco::Logger * logger;
+	bool owned_by_shared_ptr = false;
+};
+
+using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
+LoggerMap * _pLoggerMap = nullptr;
 
 }
 
@@ -302,9 +309,38 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le
 namespace
 {
 
+struct LoggerDeleter
+{
+	void operator()(Poco::Logger * logger)
+	{
+		std::lock_guard<std::mutex> lock(getLoggerMutex());
+
+		/// If logger infrastructure is destroyed just decrement logger reference count
+		if (!_pLoggerMap)
+		{
+			logger->release();
+			return;
+		}
+
+		auto it = _pLoggerMap->find(logger->name());
+		assert(it != _pLoggerMap->end());
+
+		/** If reference count is 1, this means this shared pointer owns logger
+		  * and need destroy it.
+		  */
+		size_t reference_count_before_release = logger->release();
+		if (reference_count_before_release == 1)
+		{
+			assert(it->second.owned_by_shared_ptr);
+			_pLoggerMap->erase(it);
+		}
+	}
+};
+
+
 inline LoggerPtr makeLoggerPtr(Logger & logger)
 {
-	return LoggerPtr(&logger, false /*add_ref*/);
+	return std::shared_ptr<Logger>(&logger, LoggerDeleter());
 }
 
 }
@@ -314,87 +350,64 @@ Logger& Logger::get(const std::string& name)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	auto [it, inserted] = unsafeGet(name, false /*get_shared*/);
-	return *it->second.logger;
+	Logger & logger = unsafeGet(name);
+
+	/** If there are already shared pointer created for this logger
+	  * we need to increment Logger reference count and now logger
+	  * is owned by logger infrastructure.
+	  */
+	auto it = _pLoggerMap->find(name);
+	if (it->second.owned_by_shared_ptr)
+	{
+		it->second.logger->duplicate();
+		it->second.owned_by_shared_ptr = false;
+	}
+
+	return logger;
 }
 
 
-LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created)
+LoggerPtr Logger::getShared(const std::string & name)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
-	auto [it, inserted] = unsafeGet(name, true /*get_shared*/);
+	bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
 
-	/** If during `unsafeGet` logger was created, then this shared pointer owns it.
-	  * If logger was already created, then this shared pointer does not own it.
+	Logger & logger = unsafeGet(name);
+
+	/** If logger already exists, then this shared pointer does not own it.
+	  * If logger does not exists, logger infrastructure could be already destroyed
+	  * or logger was created.
 	  */
-	if (inserted)
+	if (logger_exists)
 	{
-		if (should_be_owned_by_shared_ptr_if_created)
-			it->second.owned_by_shared_ptr = true;
-		else
-			it->second.logger->duplicate();
+		logger.duplicate();
+	}
+	else if (_pLoggerMap)
+	{
+		_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
 	}
 
-	return makeLoggerPtr(*it->second.logger);
+	return makeLoggerPtr(logger);
 }
 
 
-std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string& name, bool get_shared)
+Logger& Logger::unsafeGet(const std::string& name)
 {
-	std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
-
-	bool should_recreate_logger = false;
-
-	if (optional_logger_it)
+	Logger* pLogger = find(name);
+	if (!pLogger)
 	{
-		auto & logger_it = *optional_logger_it;
-		std::optional<size_t> reference_count_before;
-
-		if (get_shared)
-		{
-			reference_count_before = logger_it->second.logger->duplicate();
-		}
-		else if (logger_it->second.owned_by_shared_ptr)
-		{
-			reference_count_before = logger_it->second.logger->duplicate();
-			logger_it->second.owned_by_shared_ptr = false;
-		}
-
-		/// Other thread already decided to delete this logger, but did not yet remove it from map
-		if (reference_count_before && reference_count_before == 0)
-			should_recreate_logger = true;
-	}
-
-	if (!optional_logger_it || should_recreate_logger)
-	{
-		Logger * logger = nullptr;
-
 		if (name == ROOT)
 		{
-			logger = new Logger(name, nullptr, Message::PRIO_INFORMATION);
+			pLogger = new Logger(name, 0, Message::PRIO_INFORMATION);
 		}
 		else
 		{
 			Logger& par = parent(name);
-			logger = new Logger(name, par.getChannel(), par.getLevel());
+			pLogger = new Logger(name, par.getChannel(), par.getLevel());
 		}
-
-		if (should_recreate_logger)
-		{
-			(*optional_logger_it)->second.logger = logger;
-			return std::make_pair(*optional_logger_it, true);
-		}
-
-		return add(logger);
+		add(pLogger);
 	}
-
-	return std::make_pair(*optional_logger_it, false);
-}
-
-
-Logger * Logger::unsafeGetRawPtr(const std::string & name)
-{
-	return unsafeGet(name, false /*get_shared*/).first->second.logger;
+	return *pLogger;
 }
 
 
@@ -402,24 +415,24 @@ Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	return *unsafeCreate(name, pChannel, level).first->second.logger;
+	return unsafeCreate(name, pChannel, level);
 }
 
 LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	auto [it, inserted] = unsafeCreate(name, pChannel, level);
-	it->second.owned_by_shared_ptr = true;
+	Logger & logger = unsafeCreate(name, pChannel, level);
+	_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
 
-	return makeLoggerPtr(*it->second.logger);
+	return makeLoggerPtr(logger);
 }
 
 Logger& Logger::root()
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	return *unsafeGetRawPtr(ROOT);
+	return unsafeGet(ROOT);
 }
 
 
@@ -427,11 +440,7 @@ Logger* Logger::has(const std::string& name)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	auto optional_it = find(name);
-	if (!optional_it)
-		return nullptr;
-
-	return (*optional_it)->second.logger;
+	return find(name);
 }
 
 
@@ -450,69 +459,20 @@ void Logger::shutdown()
 		}
 
 		delete _pLoggerMap;
-		_pLoggerMap = nullptr;
+		_pLoggerMap = 0;
 	}
 }
 
 
-std::optional<Logger::LoggerMapIterator> Logger::find(const std::string& name)
+Logger* Logger::find(const std::string& name)
 {
 	if (_pLoggerMap)
 	{
 		LoggerMap::iterator it = _pLoggerMap->find(name);
 		if (it != _pLoggerMap->end())
-			return it;
-
-		return {};
+			return it->second.logger;
 	}
-
-	return {};
-}
-
-Logger * Logger::findRawPtr(const std::string & name)
-{
-	auto optional_it = find(name);
-	if (!optional_it)
-		return nullptr;
-
-	return (*optional_it)->second.logger;
-}
-
-
-void intrusive_ptr_add_ref(Logger * ptr)
-{
-	ptr->duplicate();
-}
-
-
-void intrusive_ptr_release(Logger * ptr)
-{
-	size_t reference_count_before = ptr->_counter.fetch_sub(1, std::memory_order_acq_rel);
-	if (reference_count_before != 1)
-		return;
-
-	{
-		std::lock_guard<std::mutex> lock(getLoggerMutex());
-
-		if (_pLoggerMap)
-		{
-			auto it = _pLoggerMap->find(ptr->name());
-
-			/** It is possible that during release other thread created logger and
-			  * updated iterator in map.
-			  */
-			if (it != _pLoggerMap->end() && ptr == it->second.logger)
-			{
-				/** If reference count is 0, this means this intrusive pointer owns logger
-				  * and need destroy it.
-				  */
-				assert(it->second.owned_by_shared_ptr);
-				_pLoggerMap->erase(it);
-			}
-		}
-	}
-
-	delete ptr;
+	return 0;
 }
 
 
@@ -530,14 +490,14 @@ void Logger::names(std::vector<std::string>& names)
 	}
 }
 
-
-std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
+Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
 {
 	if (find(name)) throw ExistsException();
 	Logger* pLogger = new Logger(name, pChannel, level);
-	return add(pLogger);
-}
+	add(pLogger);
 
+	return *pLogger;
+}
 
 Logger& Logger::parent(const std::string& name)
 {
@@ -545,13 +505,13 @@ Logger& Logger::parent(const std::string& name)
 	if (pos != std::string::npos)
 	{
 		std::string pname = name.substr(0, pos);
-		Logger* pParent = findRawPtr(pname);
+		Logger* pParent = find(pname);
 		if (pParent)
 			return *pParent;
 		else
 			return parent(pname);
 	}
-	else return *unsafeGetRawPtr(ROOT);
+	else return unsafeGet(ROOT);
 }
 
 
@@ -619,14 +579,12 @@ namespace
 }
 
 
-std::pair<Logger::LoggerMapIterator, bool> Logger::add(Logger* pLogger)
+void Logger::add(Logger* pLogger)
 {
 	if (!_pLoggerMap)
-		_pLoggerMap = new Logger::LoggerMap;
+		_pLoggerMap = new LoggerMap;
 
-	auto result = _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
-	assert(result.second);
-	return result;
+	_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
 }
 
 
diff --git a/src/Common/Logger.h b/src/Common/Logger.h
index 0425da8c847..6dcdea9a9d8 100644
--- a/src/Common/Logger.h
+++ b/src/Common/Logger.h
@@ -2,8 +2,6 @@
 
 #include <memory>
 
-#include <base/defines.h>
-
 #include <Poco/Channel.h>
 #include <Poco/Logger.h>
 #include <Poco/Message.h>
@@ -26,16 +24,6 @@ using LoggerRawPtr = Poco::Logger *;
   */
 LoggerPtr getLogger(const std::string & name);
 
-/** Get Logger with specified name. If the Logger does not exists, it is created.
-  * This overload was added for specific purpose, when logger is constructed from constexpr string.
-  * Logger is destroyed only during program shutdown.
-  */
-template <size_t n>
-ALWAYS_INLINE LoggerPtr getLogger(const char (&name)[n])
-{
-    return Poco::Logger::getShared(name, false /*should_be_owned_by_shared_ptr_if_created*/);
-}
-
 /** Create Logger with specified name, channel and logging level.
   * If Logger already exists, throws exception.
   * Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.
diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp
index 6d2bd56ad77..622497fe2f5 100644
--- a/src/Common/tests/gtest_log.cpp
+++ b/src/Common/tests/gtest_log.cpp
@@ -9,7 +9,6 @@
 #include <Poco/NullChannel.h>
 #include <Poco/StreamChannel.h>
 #include <sstream>
-#include <thread>
 
 
 TEST(Logger, Log)
@@ -101,75 +100,3 @@ TEST(Logger, SideEffects)
 
     LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
 }
-
-TEST(Logger, SharedRawLogger)
-{
-    {
-        std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
-
-        auto shared_logger = getLogger("Logger_1");
-        shared_logger->setChannel(stream_channel.get());
-        shared_logger->setLevel("trace");
-
-        LOG_TRACE(shared_logger, "SharedLogger1Log1");
-        LOG_TRACE(getRawLogger("Logger_1"), "RawLogger1Log");
-        LOG_TRACE(shared_logger, "SharedLogger1Log2");
-
-        auto actual = stream.str();
-        EXPECT_EQ(actual, "SharedLogger1Log1\nRawLogger1Log\nSharedLogger1Log2\n");
-    }
-    {
-        std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
-
-        auto * raw_logger = getRawLogger("Logger_2");
-        raw_logger->setChannel(stream_channel.get());
-        raw_logger->setLevel("trace");
-
-        LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log1");
-        LOG_TRACE(raw_logger, "RawLogger2Log");
-        LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log2");
-
-        auto actual = stream.str();
-        EXPECT_EQ(actual, "SharedLogger2Log1\nRawLogger2Log\nSharedLogger2Log2\n");
-    }
-}
-
-TEST(Logger, SharedLoggersThreadSafety)
-{
-    static size_t threads_count = std::thread::hardware_concurrency();
-    static constexpr size_t loggers_count = 10;
-    static constexpr size_t logger_get_count = 1000;
-
-    Poco::Logger::root();
-
-    std::vector<std::string> names;
-
-    Poco::Logger::names(names);
-    size_t loggers_size_before = names.size();
-
-    std::vector<std::thread> threads;
-
-    for (size_t thread_index = 0; thread_index < threads_count; ++thread_index)
-    {
-        threads.emplace_back([]()
-        {
-            for (size_t logger_index = 0; logger_index < loggers_count; ++logger_index)
-            {
-                for (size_t iteration = 0; iteration < logger_get_count; ++iteration)
-                {
-                    getLogger("Logger_" + std::to_string(logger_index));
-                }
-            }
-        });
-    }
-
-    for (auto & thread : threads)
-        thread.join();
-
-    Poco::Logger::names(names);
-    size_t loggers_size_after = names.size();
-
-    EXPECT_EQ(loggers_size_before, loggers_size_after);
-}
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index 4a6bb924bdc..ec6f7081c85 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -102,7 +102,7 @@ void checkS3Capabilities(
     if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash))
     {
         LOG_WARNING(
-            getLogger("S3ObjectStorage"),
+            &Poco::Logger::get("S3ObjectStorage"),
             "Storage for disk {} does not support batch delete operations, "
             "so `s3_capabilities.support_batch_delete` was automatically turned off during the access check. "
             "To remove this message set `s3_capabilities.support_batch_delete` for the disk to `false`.",
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 786b23caf48..0223c24973e 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -82,7 +82,7 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
             if (!inserted)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", file_path);
 
-            LOG_TRACE(getLogger("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
+            LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
             loaded_files.emplace_back(file_path);
         }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
index 107e21c2dda..04019d2c665 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
@@ -180,7 +180,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneSourcePart(
     }
 
     LOG_DEBUG(
-        getLogger("MergeTreeDataPartCloner"),
+        &Poco::Logger::get("MergeTreeDataPartCloner"),
         "Clone {} part {} to {}{}",
         src_flushed_tmp_part ? "flushed" : "",
         src_part_storage->getFullPath(),
diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp
index 9e107c99534..e06b301edbf 100644
--- a/utils/keeper-data-dumper/main.cpp
+++ b/utils/keeper-data-dumper/main.cpp
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
         Poco::Logger::root().setChannel(channel);
         Poco::Logger::root().setLevel("trace");
     }
-    auto logger = getLogger("keeper-dumper");
+    auto * logger = &Poco::Logger::get("keeper-dumper");
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
     CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();

From 4b77258341cfc35bfeb2ad63a6747f34cfb0c54b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 11:28:35 +0000
Subject: [PATCH 283/884] Fix test

---
 src/QueryPipeline/RemoteQueryExecutor.h                      | 2 +-
 .../02769_parallel_replicas_unavailable_shards.sql           | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index cc3291313a8..1eee0ac664f 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -52,7 +52,7 @@ public:
         std::optional<IConnections::ReplicaInfo> replica_info = {};
     };
 
-    /// Takes a connection pool to a node (not cluster)
+    /// Takes a connection pool for a node (not cluster)
     RemoteQueryExecutor(
         IConnectionPool * pool,
         const String & query_,
diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
index 38d592201e3..be200353f06 100644
--- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
+++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
@@ -6,10 +6,11 @@ SYSTEM FLUSH LOGS;
 
 SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1;
 SET send_logs_level='error';
-SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*);
+SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79';
 
 SYSTEM FLUSH LOGS;
 
-SELECT count() > 0 FROM system.text_log WHERE yesterday() <= event_date AND message LIKE '%Replica number 10 is unavailable%';
+SET allow_experimental_parallel_reading_from_replicas=0;
+SELECT count() FROM system.text_log WHERE yesterday() <= event_date AND query_id in (select query_id from system.query_log where log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79' and current_database = currentDatabase()) and message LIKE '%Replica number % is unavailable%';
 
 DROP TABLE test_parallel_replicas_unavailable_shards;

From e7f63abc2f344722cbb40f462c4a40ab3c58ae4c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:41:46 +0000
Subject: [PATCH 284/884] Update version_date.tsv and changelogs after
 v24.1.2.5-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.2.5-stable.md  | 14 ++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.1.2.5-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 82405d63df9..fe33bf9e0ea 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.1.2048"
+ARG VERSION="24.1.2.5"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index f48e14aba50..f0adadd2d59 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.1.2048"
+ARG VERSION="24.1.2.5"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 60ef7a67563..bc15c99a034 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.1.2048"
+ARG VERSION="24.1.2.5"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.2.5-stable.md b/docs/changelogs/v24.1.2.5-stable.md
new file mode 100644
index 00000000000..bac25c9b9ed
--- /dev/null
+++ b/docs/changelogs/v24.1.2.5-stable.md
@@ -0,0 +1,14 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.2.5-stable (b2605dd4a5a) FIXME as compared to v24.1.1.2048-stable (5a024dfc093)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 34df4a3f0cf..ab3535adccf 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30
 v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28

From ae6618e0b3c8bf99d84644c08764d30b2cea26df Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:57:38 +0000
Subject: [PATCH 285/884] Update version_date.tsv and changelogs after
 v23.12.3.40-stable

---
 docs/changelogs/v23.12.3.40-stable.md | 36 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  1 +
 2 files changed, 37 insertions(+)
 create mode 100644 docs/changelogs/v23.12.3.40-stable.md

diff --git a/docs/changelogs/v23.12.3.40-stable.md b/docs/changelogs/v23.12.3.40-stable.md
new file mode 100644
index 00000000000..e2a9e3af407
--- /dev/null
+++ b/docs/changelogs/v23.12.3.40-stable.md
@@ -0,0 +1,36 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.12.3.40-stable (a594704ae75) FIXME as compared to v23.12.2.59-stable (17ab210e761)
+
+#### Improvement
+* Backported in [#58660](https://github.com/ClickHouse/ClickHouse/issues/58660): When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Backported in [#58817](https://github.com/ClickHouse/ClickHouse/issues/58817): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#59235](https://github.com/ClickHouse/ClickHouse/issues/59235): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)).
+* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
+* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
+* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)).
+* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)).
+* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index ab3535adccf..773db8a01d1 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,6 @@
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30
+v23.12.3.40-stable	2024-02-02
 v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
 v23.11.4.24-stable	2024-01-05

From 6df4c64500f16bbc575570089ec798964919389b Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 2 Feb 2024 13:20:32 +0100
Subject: [PATCH 286/884] Use in integration test

---
 tests/integration/test_replicated_database/configs/config.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
index e598cc28d5d..7d779cb0d2e 100644
--- a/tests/integration/test_replicated_database/configs/config.xml
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -4,4 +4,5 @@
     <merge_tree>
         <initialization_retry_period>10</initialization_retry_period>
     </merge_tree>
+    <max_database_replicated_create_table_thread_pool_size>50</max_database_replicated_create_table_thread_pool_size>
 </clickhouse>

From 31d1205aa41dbf4001561e6dc62e44a627ef0983 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 2 Feb 2024 13:23:04 +0100
Subject: [PATCH 287/884] Less than two threads mean disabled

---
 src/Core/ServerSettings.h            | 2 +-
 src/Databases/DatabaseReplicated.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 99fe66c34ad..da49153bdf0 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -114,7 +114,7 @@ namespace DB
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
-    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means tables will be created sequentially.", 0) \
+    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two mean tables will be created sequentially.", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 0d02d2a5a1c..fcefa2ae253 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1093,7 +1093,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     tables_dependencies.checkNoCyclicDependencies();
 
-    auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 0;
+    auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1;
     auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
 
     auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");

From 679276cca264c97a9e16269818baa5fe1e82eb5f Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 2 Feb 2024 13:32:28 +0100
Subject: [PATCH 288/884] Typo

---
 src/Core/ServerSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index da49153bdf0..92e91cbca54 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -114,7 +114,7 @@ namespace DB
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
-    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two mean tables will be created sequentially.", 0) \
+    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two means tables will be created sequentially.", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 

From 67ac92e1a1e109a36c2e7a6871d6da52279aa96b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 2 Feb 2024 12:55:17 +0000
Subject: [PATCH 289/884] Update version_date.tsv and changelogs after
 v23.11.5.29-stable

---
 docs/changelogs/v23.11.5.29-stable.md | 31 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  2 ++
 2 files changed, 33 insertions(+)
 create mode 100644 docs/changelogs/v23.11.5.29-stable.md

diff --git a/docs/changelogs/v23.11.5.29-stable.md b/docs/changelogs/v23.11.5.29-stable.md
new file mode 100644
index 00000000000..f73a21c2095
--- /dev/null
+++ b/docs/changelogs/v23.11.5.29-stable.md
@@ -0,0 +1,31 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.11.5.29-stable (d83b108deca) FIXME as compared to v23.11.4.24-stable (e79d840d7fe)
+
+#### Improvement
+* Backported in [#58815](https://github.com/ClickHouse/ClickHouse/issues/58815): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#59234](https://github.com/ClickHouse/ClickHouse/issues/59234): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
+* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
+* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* refine error message [#57991](https://github.com/ClickHouse/ClickHouse/pull/57991) ([Han Fei](https://github.com/hanfei1991)).
+* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)).
+* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index ab3535adccf..36b1db583a8 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,9 @@
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30
+v23.12.3.40-stable	2024-02-02
 v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.5.29-stable	2024-02-02
 v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13

From d5e3b7159015b8a6b5145d7f29ab8c47b1274d52 Mon Sep 17 00:00:00 2001
From: Alexander Sapin <alesapin@gmail.com>
Date: Fri, 2 Feb 2024 14:07:37 +0100
Subject: [PATCH 290/884] Remove scarry message if error is retryable

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 87f23b0da2a..cc435ba7c36 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -686,9 +686,12 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
     }
     catch (...)
     {
+        /// Don't scare people with broken part error
+        if (!isRetryableException(std::current_exception()))
+            LOG_ERROR(storage.log, "Part {} is broken and need manual correction", getDataPartStorage().getFullPath());
+
         // There could be conditions that data part to be loaded is broken, but some of meta infos are already written
         // into meta data before exception, need to clean them all.
-        LOG_ERROR(storage.log, "Part {} is broken and need manual correction", getDataPartStorage().getFullPath());
         metadata_manager->deleteAll(/*include_projection*/ true);
         metadata_manager->assertAllDeleted(/*include_projection*/ true);
         throw;

From 7cd78b7fc4010fe6d5599a0a41325a2880f4a53a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 13:09:23 +0000
Subject: [PATCH 291/884] Fix clang-tidy warning

---
 src/Server/TCPHandler.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 80ec484ab1c..f1087fa14b0 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -147,8 +147,8 @@ public:
       *  because it allows to check the IP ranges of the trusted proxy.
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string host_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string host_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
     ~TCPHandler() override;
 
     void run() override;

From 8ebd7a7952dcace13bf71cf29431f992869ad909 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 13:39:06 +0000
Subject: [PATCH 292/884] Formatting

---
 src/Processors/QueryPlan/ReadFromRemote.cpp | 12 ++++++++++--
 src/Processors/QueryPlan/ReadFromRemote.h   |  3 +--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index fcdb7cd4a70..cab3ae72678 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -438,7 +438,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
 }
 
 
-void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, IConnectionPool* pool, IConnections::ReplicaInfo replica_info)
+void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
+    Pipes & pipes, IConnectionPool * pool, IConnections::ReplicaInfo replica_info)
 {
     bool add_agg_info = stage == QueryProcessingStage::WithMergeableState;
     bool add_totals = false;
@@ -458,7 +459,14 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, I
     assert(output_stream);
 
     auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
-        pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage,
+        pool,
+        query_string,
+        output_stream->header,
+        context,
+        throttler,
+        scalars,
+        external_tables,
+        stage,
         RemoteQueryExecutor::Extension{.parallel_reading_coordinator = coordinator, .replica_info = std::move(replica_info)});
 
     remote_query_executor->setLogger(log);
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index 07443220c8d..eb3bcd12cc3 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -90,8 +90,7 @@ public:
     void enforceAggregationInOrder();
 
 private:
-
-    void addPipeForSingeReplica(Pipes & pipes, IConnectionPool* pool, IConnections::ReplicaInfo replica_info);
+    void addPipeForSingeReplica(Pipes & pipes, IConnectionPool * pool, IConnections::ReplicaInfo replica_info);
 
     ClusterPtr cluster;
     ASTPtr query_ast;

From 9febc8e5b43215408389115db149da54c4b61c0a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 2 Feb 2024 14:39:23 +0100
Subject: [PATCH 293/884] Update the peter-evans/create-pull-request action to
 v6

---
 .github/workflows/tags_stable.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml
index 0a3945829ca..e4fc9f0b1d3 100644
--- a/.github/workflows/tags_stable.yml
+++ b/.github/workflows/tags_stable.yml
@@ -55,7 +55,7 @@ jobs:
         python3 ./utils/security-generator/generate_security.py > SECURITY.md
         git diff HEAD
     - name: Create Pull Request
-      uses: peter-evans/create-pull-request@v3
+      uses: peter-evans/create-pull-request@v6
       with:
         author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
         token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}

From 05a7d22c24f030a5bc9aa75fc22e4ca103e37468 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 2 Feb 2024 13:45:04 +0000
Subject: [PATCH 294/884] WIP on calculate aliases only once

---
 src/Storages/StorageMerge.cpp | 111 ++++++++++++++++++++--------------
 src/Storages/StorageMerge.h   |   2 +-
 2 files changed, 67 insertions(+), 46 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 17a6ade4059..5ef6b5117f6 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -58,6 +58,7 @@
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
+#include "Core/NamesAndTypes.h"
 
 namespace
 {
@@ -388,7 +389,12 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
         Names column_names_as_aliases;
         Aliases aliases;
-        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, column_names, column_names_as_aliases, aliases);
+
+        Names real_column_names = column_names;
+        if (child_plan.row_policy_data_opt)
+            child_plan.row_policy_data_opt->extendNames(real_column_names);
+
+        auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases);
 
         auto source_pipeline = createSources(
             child_plan.plan,
@@ -624,47 +630,50 @@ public:
             column != nullptr && column->hasExpression())
         {
             node = column->getExpressionOrThrow();
+            node->setAlias(column->getColumnName());
         }
     }
 };
 
-bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
-{
-    QueryTreeNodes stack = { node };
-    while (!stack.empty())
-    {
-        auto current = stack.back();
-        stack.pop_back();
+// bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
+// {
+//     QueryTreeNodes stack = { node };
+//     while (!stack.empty())
+//     {
+//         auto current = stack.back();
+//         stack.pop_back();
 
-        switch (current->getNodeType())
-        {
-            case QueryTreeNodeType::CONSTANT:
-                break;
-            case QueryTreeNodeType::COLUMN:
-            {
-                auto * column_node = current->as<ColumnNode>();
-                auto source = column_node->getColumnSourceOrNull();
-                if (source != replacement_table_expression)
-                    return true;
-                break;
-            }
-            default:
-            {
-                for (const auto & child : current->getChildren())
-                {
-                    if (child)
-                        stack.push_back(child);
-                }
-            }
-        }
-    }
-    return false;
-}
+//         switch (current->getNodeType())
+//         {
+//             case QueryTreeNodeType::CONSTANT:
+//                 break;
+//             case QueryTreeNodeType::COLUMN:
+//             {
+//                 auto * column_node = current->as<ColumnNode>();
+//                 auto source = column_node->getColumnSourceOrNull();
+//                 if (source != replacement_table_expression)
+//                     return true;
+//                 break;
+//             }
+//             default:
+//             {
+//                 for (const auto & child : current->getChildren())
+//                 {
+//                     if (child)
+//                         stack.push_back(child);
+//                 }
+//             }
+//         }
+//     }
+//     return false;
+// }
 
 QueryTreeNodePtr removeJoin(
     QueryTreeNodePtr query,
     QueryTreeNodePtr original_table_expression,
-    QueryTreeNodePtr replacement_table_expression)
+    QueryTreeNodePtr replacement_table_expression,
+    const ContextPtr & context,
+    const Names & required_column_names)
 {
     auto * query_node = query->as<QueryNode>();
     auto join_tree = query_node->getJoinTree();
@@ -687,21 +696,33 @@ QueryTreeNodePtr removeJoin(
     if (join_tree->as<TableNode>() == nullptr && join_tree->as<TableFunctionNode>() == nullptr)
     {
         auto & projection = modified_query_node->getProjection().getNodes();
-        auto projection_columns = modified_query_node->getProjectionColumns();
-        for (size_t i = 0; i < projection.size();)
+        projection.clear();
+        NamesAndTypes projection_columns;
+
+        for (auto const & column_name : required_column_names)
         {
-            if (hasUnknownColumn(projection[i], replacement_table_expression))
-            {
-                projection.erase(projection.begin() + i);
-                projection_columns.erase(projection_columns.begin() + i);
-                continue;
-            }
-            ++i;
+            QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
+
+            QueryAnalysisPass query_analysis_pass(original_table_expression);
+            query_analysis_pass.run(fake_node, context);
+
+            auto * resolved_column = fake_node->as<ColumnNode>();
+            if (!resolved_column)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
+            auto fake_column = resolved_column->getColumn();
+
+            ApplyAliasColumnExpressionsVisitor visitor;
+            visitor.visit(fake_node);
+
+            projection.push_back(fake_node);
+            projection_columns.push_back(fake_column);
         }
 
         query_node->resolveProjectionColumns(std::move(projection_columns));
     }
 
+    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Result:\n{}", modified_query->dumpTree());
+
     return modified_query;
 }
 
@@ -710,7 +731,7 @@ QueryTreeNodePtr removeJoin(
 SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context,
     const StorageWithLockAndName & storage_with_lock_and_name,
     const StorageSnapshotPtr & storage_snapshot,
-    Names real_column_names,
+    Names required_column_names,
     Names & column_names_as_aliases,
     Aliases & aliases) const
 {
@@ -725,7 +746,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         if (query_info.table_expression_modifiers)
             replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers);
 
-        modified_query_info.query_tree = removeJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression);
+        modified_query_info.query_tree = removeJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression, modified_context, required_column_names);
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
@@ -755,7 +776,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         if (with_aliases)
         {
             auto filter_actions_dag = std::make_shared<ActionsDAG>();
-            for (const auto & column : real_column_names)
+            for (const auto & column : required_column_names)
             {
                 const auto column_default = storage_columns.getDefault(column);
                 bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index c500d1358a3..f5b6c3a7ca9 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -192,7 +192,7 @@ private:
     SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context,
         const StorageWithLockAndName & storage_with_lock_and_name,
         const StorageSnapshotPtr & storage_snapshot,
-        Names real_column_names,
+        Names required_column_names,
         Names & column_names_as_aliases,
         Aliases & aliases) const;
 

From 4c75b4bc7ca549bfad5225526e6fee62026b0440 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 14:04:27 +0000
Subject: [PATCH 295/884] Return display name

---
 src/Server/TCPHandler.cpp      | 12 ++++++------
 src/Server/TCPHandler.h        |  6 +++---
 src/Server/TCPHandlerFactory.h | 13 ++++---------
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 339b664640f..ec6b374518d 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -184,7 +184,7 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo
 namespace DB
 {
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string host_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
+TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
@@ -192,11 +192,11 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , log(getLogger("TCPHandler"))
     , read_event(read_event_)
     , write_event(write_event_)
-    , host_name(std::move(host_name_))
+    , server_display_name(std::move(server_display_name_))
 {
 }
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string host_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
+TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
 : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
@@ -206,7 +206,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , read_event(read_event_)
     , write_event(write_event_)
     , default_database(stack_data.default_database)
-    , host_name(std::move(host_name_))
+    , server_display_name(std::move(server_display_name_))
 {
     if (!forwarded_for.empty())
         LOG_TRACE(log, "Forwarded client address: {}", forwarded_for);
@@ -1201,7 +1201,7 @@ void TCPHandler::sendExtremes(const Block & extremes)
 void TCPHandler::sendProfileEvents()
 {
     Block block;
-    ProfileEvents::getProfileEvents(host_name, state.profile_queue, block, last_sent_snapshots);
+    ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots);
     if (block.rows() != 0)
     {
         initProfileEventsBlockOutput(block);
@@ -1536,7 +1536,7 @@ void TCPHandler::sendHello()
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
         writeStringBinary(DateLUT::instance().getTimeZone(), *out);
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
-        writeStringBinary(host_name, *out);
+        writeStringBinary(server_display_name, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
         writeVarUInt(VERSION_PATCH, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index f1087fa14b0..26cecf46662 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -147,8 +147,8 @@ public:
       *  because it allows to check the IP ranges of the trusted proxy.
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string host_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string host_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
     ~TCPHandler() override;
 
     void run() override;
@@ -224,7 +224,7 @@ private:
     ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots;
 
     /// It is the name of the server that will be sent to the client.
-    String host_name;
+    String server_display_name;
 
     void runImpl();
 
diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index 212572fb7a7..d65c9898b23 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -19,7 +19,7 @@ private:
     IServer & server;
     bool parse_proxy_protocol = false;
     LoggerPtr log;
-    std::string host_name;
+    std::string server_display_name;
 
     ProfileEvents::Event read_event;
     ProfileEvents::Event write_event;
@@ -42,12 +42,7 @@ public:
         , read_event(read_event_)
         , write_event(write_event_)
     {
-        const bool use_display_name = server.config().getBool("tcp_use_display_name_as_host_name", false);
-        if (use_display_name)
-            host_name = server.config().getString("display_name", "");
-
-        if (!host_name.length())
-            host_name = getFQDNOrHostName();
+        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override
@@ -55,7 +50,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, host_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {
@@ -69,7 +64,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, stack_data, host_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {

From 8c60c6b6690f13183da1f2c46a3e674cd2cbcce7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 14:24:44 +0000
Subject: [PATCH 296/884] Use always host_name for profile events block

---
 src/Server/TCPHandler.cpp      | 26 ++++++++++++++++++++++----
 src/Server/TCPHandler.h        | 21 +++++++++++++++++++--
 src/Server/TCPHandlerFactory.h |  6 ++++--
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index ec6b374518d..2c4e9c1e3b2 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -184,7 +184,15 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo
 namespace DB
 {
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
+TCPHandler::TCPHandler(
+    IServer & server_,
+    TCPServer & tcp_server_,
+    const Poco::Net::StreamSocket & socket_,
+    bool parse_proxy_protocol_,
+    std::string server_display_name_,
+    std::string host_name_,
+    const ProfileEvents::Event & read_event_,
+    const ProfileEvents::Event & write_event_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
@@ -193,11 +201,20 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , read_event(read_event_)
     , write_event(write_event_)
     , server_display_name(std::move(server_display_name_))
+    , host_name(std::move(host_name_))
 {
 }
 
-TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_)
-: Poco::Net::TCPServerConnection(socket_)
+TCPHandler::TCPHandler(
+    IServer & server_,
+    TCPServer & tcp_server_,
+    const Poco::Net::StreamSocket & socket_,
+    TCPProtocolStackData & stack_data,
+    std::string server_display_name_,
+    std::string host_name_,
+    const ProfileEvents::Event & read_event_,
+    const ProfileEvents::Event & write_event_)
+    : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , tcp_server(tcp_server_)
     , log(getLogger("TCPHandler"))
@@ -207,6 +224,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N
     , write_event(write_event_)
     , default_database(stack_data.default_database)
     , server_display_name(std::move(server_display_name_))
+    , host_name(std::move(host_name_))
 {
     if (!forwarded_for.empty())
         LOG_TRACE(log, "Forwarded client address: {}", forwarded_for);
@@ -1201,7 +1219,7 @@ void TCPHandler::sendExtremes(const Block & extremes)
 void TCPHandler::sendProfileEvents()
 {
     Block block;
-    ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots);
+    ProfileEvents::getProfileEvents(host_name, state.profile_queue, block, last_sent_snapshots);
     if (block.rows() != 0)
     {
         initProfileEventsBlockOutput(block);
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 26cecf46662..fc42a614f5c 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -147,8 +147,24 @@ public:
       *  because it allows to check the IP ranges of the trusted proxy.
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
-    TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(
+        IServer & server_,
+        TCPServer & tcp_server_,
+        const Poco::Net::StreamSocket & socket_,
+        bool parse_proxy_protocol_,
+        String server_display_name_,
+        String host_name_,
+        const ProfileEvents::Event & read_event_ = ProfileEvents::end(),
+        const ProfileEvents::Event & write_event_ = ProfileEvents::end());
+    TCPHandler(
+        IServer & server_,
+        TCPServer & tcp_server_,
+        const Poco::Net::StreamSocket & socket_,
+        TCPProtocolStackData & stack_data,
+        String server_display_name_,
+        String host_name_,
+        const ProfileEvents::Event & read_event_ = ProfileEvents::end(),
+        const ProfileEvents::Event & write_event_ = ProfileEvents::end());
     ~TCPHandler() override;
 
     void run() override;
@@ -225,6 +241,7 @@ private:
 
     /// It is the name of the server that will be sent to the client.
     String server_display_name;
+    String host_name;
 
     void runImpl();
 
diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index d65c9898b23..3e861b9e18d 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -20,6 +20,7 @@ private:
     bool parse_proxy_protocol = false;
     LoggerPtr log;
     std::string server_display_name;
+    std::string host_name;
 
     ProfileEvents::Event read_event;
     ProfileEvents::Event write_event;
@@ -43,6 +44,7 @@ public:
         , write_event(write_event_)
     {
         server_display_name = server.config().getString("display_name", getFQDNOrHostName());
+        host_name = getFQDNOrHostName();
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override
@@ -50,7 +52,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, host_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {
@@ -64,7 +66,7 @@ public:
         try
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
-            return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name, read_event, write_event);
+            return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name, host_name, read_event, write_event);
         }
         catch (const Poco::Net::NetException &)
         {

From 931fc6572ce2ce7cc9d3fd9ec0d92bbe552e377e Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 14:33:34 +0000
Subject: [PATCH 297/884] Call getFQDNOrHostName() once

---
 src/Server/TCPHandlerFactory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index 3e861b9e18d..4e9963d2c6e 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -19,8 +19,8 @@ private:
     IServer & server;
     bool parse_proxy_protocol = false;
     LoggerPtr log;
-    std::string server_display_name;
     std::string host_name;
+    std::string server_display_name;
 
     ProfileEvents::Event read_event;
     ProfileEvents::Event write_event;
@@ -43,8 +43,8 @@ public:
         , read_event(read_event_)
         , write_event(write_event_)
     {
-        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
         host_name = getFQDNOrHostName();
+        server_display_name = server.config().getString("display_name", host_name);
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override

From ea2dad181d5c91bddd63fc1ca64636123053fb8a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 15:05:14 +0000
Subject: [PATCH 298/884] Fix non linux builds

---
 src/QueryPipeline/RemoteQueryExecutor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 7f25c2331c3..90bd4bfdfdf 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -4,7 +4,7 @@
 
 #include <Columns/ColumnConst.h>
 #include <Common/CurrentThread.h>
-#include "Core/Protocol.h"
+#include <Core/Protocol.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
@@ -17,6 +17,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <IO/ConnectionTimeouts.h>
+#include <Client/ConnectionEstablisher.h>
 #include <Client/MultiplexedConnections.h>
 #include <Client/HedgedConnections.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>

From 28e6e290045c54dffedcbdb8879e785368f7bf24 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Fri, 2 Feb 2024 07:46:36 -0800
Subject: [PATCH 299/884] Minor fixes

---
 .../functions/time-series-functions.md        |  2 +-
 src/Functions/seriesOutliersDetectTukey.cpp   | 41 ++++++++++---------
 ...02813_seriesOutliersDetectTukey.reference} |  0
 ...ql => 02813_seriesOutliersDetectTukey.sql} |  3 ++
 4 files changed, 26 insertions(+), 20 deletions(-)
 rename tests/queries/0_stateless/{02813_seriesOutliersTukey.reference => 02813_seriesOutliersDetectTukey.reference} (100%)
 rename tests/queries/0_stateless/{02813_seriesOutliersTukey.sql => 02813_seriesOutliersDetectTukey.sql} (94%)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index bb6f3da25fb..ce36c89f473 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -24,7 +24,7 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
 - `series` - An array of numeric values.
 - `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
 - `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
-- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
+- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5.
 
 At least four data points are required in `series` to detect outliers.
 
diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp
index 66fda8ce976..a2fc9cf8eb6 100644
--- a/src/Functions/seriesOutliersDetectTukey.cpp
+++ b/src/Functions/seriesOutliersDetectTukey.cpp
@@ -23,6 +23,9 @@ class FunctionSeriesOutliersDetectTukey : public IFunction
 public:
     static constexpr auto name = "seriesOutliersDetectTukey";
 
+    static constexpr Float64 min_quartile = 2.0;
+    static constexpr Float64 max_quartile = 98.0;
+
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersDetectTukey>(); }
 
     std::string getName() const override { return name; }
@@ -72,18 +75,18 @@ public:
 
         Float64 min_percentile = 0.25; /// default 25th percentile
         Float64 max_percentile = 0.75; /// default 75th percentile
-        Float64 K = 1.50;
+        Float64 k = 1.50;
 
         if (arguments.size() > 1)
         {
             Float64 p_min = arguments[1].column->getFloat64(0);
-            if (p_min < 2.0 || p_min > 98.0)
+            if (p_min < min_quartile|| p_min > max_quartile)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
 
             min_percentile = p_min / 100;
 
             Float64 p_max = arguments[2].column->getFloat64(0);
-            if (p_max < 2.0 || p_max > 98.0 || p_max < min_percentile * 100)
+            if (p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
 
             max_percentile = p_max / 100;
@@ -92,19 +95,19 @@ public:
             if (k_val < 0.0)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
 
-            K = k_val;
+            k = k_val;
         }
 
-        if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
-            || executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res))
+        if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res))
         {
             return col_res;
         }
@@ -123,7 +126,7 @@ private:
         const ColumnArray::Offsets & arr_offsets,
         Float64 min_percentile,
         Float64 max_percentile,
-        Float64 K,
+        Float64 k,
         ColumnPtr & res_ptr) const
     {
         const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&arr_data);
@@ -179,8 +182,8 @@ private:
 
             Float64 iqr = q2 - q1; /// interquantile range
 
-            Float64 lower_fence = q1 - K * iqr;
-            Float64 upper_fence = q2 + K * iqr;
+            Float64 lower_fence = q1 - k * iqr;
+            Float64 upper_fence = q2 + k * iqr;
 
             for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j)
             {
@@ -206,7 +209,7 @@ Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wi
 
 ``` sql
 seriesOutliersDetectTukey(series);
-seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
+seriesOutliersDetectTukey(series, min_percentile, max_percentile, k);
 ```
 
 **Arguments**
@@ -214,7 +217,7 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
 - `series` - An array of numeric values.
 - `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
 - `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
-- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
+- `k` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
 
 At least four data points are required in `series` to detect outliers.
 
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.reference
similarity index 100%
rename from tests/queries/0_stateless/02813_seriesOutliersTukey.reference
rename to tests/queries/0_stateless/02813_seriesOutliersDetectTukey.reference
diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql
similarity index 94%
rename from tests/queries/0_stateless/02813_seriesOutliersTukey.sql
rename to tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql
index b43fa40e82b..ca116e8b7ed 100644
--- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql
+++ b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql
@@ -1,3 +1,6 @@
+-- Tags: no-cpu-aarch64
+-- Tag no-cpu-aarch64: values generated are slighly different on aarch64
+
 DROP TABLE IF EXISTS tb1;
 
 CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;

From 419545e359989c04113bb054558bd40e8369b9bd Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:01:06 +0100
Subject: [PATCH 300/884] restart ci

---
 tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
index 1f00dd717eb..05d841ce01f 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -100,7 +100,7 @@ INSERT INTO t_leading_zeroes_f VALUES (2069, '0x01P-01', 0x01P-01, 0.5, 'Hex sho
 -- using readUIntTextUpToNSignificantDigits<4>(exponent, in)
 -- INSERT INTO t_leading_zeroes_f VALUES (2070, '00009e00009', 00009e00009, 9e9, '???');
 
--- Binary should not work with input_format_values_interpret_expressions = 0;
+-- Binary should not work with input_format_values_interpret_expressions = 0
 
 INSERT INTO t_leading_zeroes_f VALUES (2050, '0b10000', 0b10000, 16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }
 INSERT INTO t_leading_zeroes_f VALUES (2051, '-0b10000', -0b10000, -16, 'Binary should not be parsed'); -- { clientError SYNTAX_ERROR }

From 0947d5c89edf4a74073967eeac544dc28e26facf Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Fri, 2 Feb 2024 18:10:47 +0100
Subject: [PATCH 301/884] CI: ci cache. step 1 (#58664)

* ci cache class
 #no_merge_commit #ci_set_reduced
---
 .github/workflows/master.yml             |   25 +-
 .github/workflows/pull_request.yml       |    4 +-
 .github/workflows/release_branches.yml   |    2 +-
 tests/ci/ci.py                           | 1075 +++++++++++++++++-----
 tests/ci/ci_config.py                    |   48 +-
 tests/ci/ci_utils.py                     |   20 +-
 tests/ci/commit_status_helper.py         |    2 +-
 tests/ci/performance_comparison_check.py |    6 +-
 tests/ci/pr_info.py                      |    5 +-
 tests/ci/s3_helper.py                    |    3 +
 tests/ci/test_ci_cache.py                |  293 ++++++
 11 files changed, 1200 insertions(+), 283 deletions(-)
 create mode 100644 tests/ci/test_ci_cache.py

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index fff058ecf87..5d57e6fc1d8 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -35,7 +35,7 @@ jobs:
       - name: PrepareRunConfig
         id: runconfig
         run: |
-            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json
+            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json
 
             echo "::group::CI configuration"
             python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
@@ -319,22 +319,15 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
+    needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}
-    needs:
-      - BuilderBinDarwin
-      - BuilderBinDarwinAarch64
-      - BuilderDebRelease
-      - BuilderDebAarch64
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Mark Commit Release Ready
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 mark_release_ready.py
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Mark Commit Release Ready
+      runner_type: style-checker
+      data: ${{ needs.RunConfig.outputs.data }}
+      run_command: |
+        python3 mark_release_ready.py
 ############################################################################################
 #################################### INSTALL PACKAGES ######################################
 ############################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 56617294fb6..9c08363f674 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -491,11 +491,11 @@ jobs:
       run_command: |
         TEMP_PATH="${TEMP_PATH}/integration" \
           python3 integration_test_check.py "Integration $CHECK_NAME" \
-            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
 
         TEMP_PATH="${TEMP_PATH}/stateless" \
           python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
-            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
 
         python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
 ##############################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 6d54e558b70..c5d8294b999 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -41,7 +41,7 @@ jobs:
         id: runconfig
         run: |
             echo "::group::configure CI run"
-            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json
+            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json
             echo "::endgroup::"
             echo "::group::CI run configure results"
             python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 433ea4a3a5f..52879a1a778 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1,5 +1,7 @@
 import argparse
 import concurrent.futures
+from dataclasses import asdict, dataclass
+from enum import Enum
 import json
 import logging
 import os
@@ -7,12 +9,14 @@ import re
 import subprocess
 import sys
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional
+import time
+from typing import Any, Dict, List, Optional, Sequence, Union
 
 import docker_images_helper
 import upload_result_helper
 from build_check import get_release_or_pr
-from ci_config import CI_CONFIG, JobNames, Labels
+from ci_config import CI_CONFIG, Build, Labels, JobNames
+from ci_utils import GHActions, is_hex
 from clickhouse_helper import (
     CiLogsCredentials,
     ClickHouseHelper,
@@ -33,6 +37,7 @@ from digest_helper import DockerDigester, JobDigester
 from env_helper import (
     CI,
     GITHUB_JOB_API_URL,
+    GITHUB_RUN_URL,
     REPO_COPY,
     REPORT_PATH,
     S3_BUILDS_BUCKET,
@@ -48,6 +53,586 @@ from s3_helper import S3Helper
 from version_helper import get_version_from_repo
 
 
+@dataclass
+class PendingState:
+    updated_at: float
+    run_url: str
+
+
+class CiCache:
+    """
+    CI cache is a bunch of records. Record is a file stored under special location on s3.
+    The file name has following format
+
+        <RECORD_TYPE>_[<ATTRIBUTES>]--<JOB_NAME>_<JOB_DIGEST>_<BATCH>_<NUM_BATCHES>.ci
+
+    RECORD_TYPE:
+        SUCCESSFUL - for successfuly finished jobs
+        PENDING - for pending jobs
+
+    ATTRIBUTES:
+        release - for jobs being executed on the release branch including master branch (not a PR branch)
+    """
+
+    _S3_CACHE_PREFIX = "CI_cache_v1"
+    _CACHE_BUILD_REPORT_PREFIX = "build_report"
+    _RECORD_FILE_EXTENSION = ".ci"
+    _LOCAL_CACHE_PATH = Path(TEMP_PATH) / "ci_cache"
+    _ATTRIBUTE_RELEASE = "release"
+    # divider symbol 1
+    _DIV1 = "--"
+    # divider symbol 2
+    _DIV2 = "_"
+    assert _DIV1 != _DIV2
+
+    class RecordType(Enum):
+        SUCCESSFUL = "successful"
+        PENDING = "pending"
+
+    @dataclass
+    class Record:
+        record_type: "CiCache.RecordType"
+        job_name: str
+        job_digest: str
+        batch: int
+        num_batches: int
+        release_branch: bool
+        file: str = ""
+
+        def to_str_key(self):
+            """other fields must not be included in the hash str"""
+            return "_".join(
+                [self.job_name, self.job_digest, str(self.batch), str(self.num_batches)]
+            )
+
+    class JobType(Enum):
+        DOCS = "DOCS"
+        SRCS = "SRCS"
+
+        @classmethod
+        def is_docs_job(cls, job_name: str) -> bool:
+            return job_name == JobNames.DOCS_CHECK
+
+        @classmethod
+        def is_srcs_job(cls, job_name: str) -> bool:
+            return not cls.is_docs_job(job_name)
+
+        @classmethod
+        def get_type_by_name(cls, job_name: str) -> "CiCache.JobType":
+            res = cls.SRCS
+            if cls.is_docs_job(job_name):
+                res = cls.DOCS
+            elif cls.is_srcs_job(job_name):
+                res = cls.SRCS
+            else:
+                assert False
+            return res
+
+    def __init__(
+        self,
+        s3: S3Helper,
+        job_digests: Dict[str, str],
+    ):
+        self.s3 = s3
+        self.job_digests = job_digests
+        self.cache_s3_paths = {
+            job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self.job_digests[self._get_reference_job_name(job_type)]}/"
+            for job_type in self.JobType
+        }
+        self.s3_record_prefixes = {
+            record_type: record_type.value for record_type in self.RecordType
+        }
+        self.records: Dict["CiCache.RecordType", Dict[str, "CiCache.Record"]] = {
+            record_type: {} for record_type in self.RecordType
+        }
+
+        self.cache_updated = False
+        self.cache_data_fetched = True
+        if not self._LOCAL_CACHE_PATH.exists():
+            self._LOCAL_CACHE_PATH.mkdir(parents=True, exist_ok=True)
+
+    def _get_reference_job_name(self, job_type: JobType) -> str:
+        res = Build.PACKAGE_RELEASE
+        if job_type == self.JobType.DOCS:
+            res = JobNames.DOCS_CHECK
+        elif job_type == self.JobType.SRCS:
+            res = Build.PACKAGE_RELEASE
+        else:
+            assert False
+        return res
+
+    def _get_record_file_name(
+        self,
+        record_type: RecordType,
+        job_name: str,
+        batch: int,
+        num_batches: int,
+        release_branch: bool,
+    ) -> str:
+        prefix = self.s3_record_prefixes[record_type]
+        prefix_extended = (
+            self._DIV2.join([prefix, self._ATTRIBUTE_RELEASE])
+            if release_branch
+            else prefix
+        )
+        assert self._DIV1 not in job_name, f"Invalid job name {job_name}"
+        job_name = self._DIV2.join(
+            [job_name, self.job_digests[job_name], str(batch), str(num_batches)]
+        )
+        file_name = self._DIV1.join([prefix_extended, job_name])
+        file_name += self._RECORD_FILE_EXTENSION
+        return file_name
+
+    def _get_record_s3_path(self, job_name: str) -> str:
+        return self.cache_s3_paths[self.JobType.get_type_by_name(job_name)]
+
+    def _parse_record_file_name(
+        self, record_type: RecordType, file_name: str
+    ) -> Optional["CiCache.Record"]:
+        # validate filename
+        if (
+            not file_name.endswith(self._RECORD_FILE_EXTENSION)
+            or not len(file_name.split(self._DIV1)) == 2
+        ):
+            print("ERROR: wrong file name format")
+            return None
+
+        file_name = file_name.removesuffix(self._RECORD_FILE_EXTENSION)
+        release_branch = False
+
+        prefix_extended, job_suffix = file_name.split(self._DIV1)
+        record_type_and_attribute = prefix_extended.split(self._DIV2)
+
+        # validate filename prefix
+        failure = False
+        if not 0 < len(record_type_and_attribute) <= 2:
+            print("ERROR: wrong file name prefix")
+            failure = True
+        if (
+            len(record_type_and_attribute) > 1
+            and record_type_and_attribute[1] != self._ATTRIBUTE_RELEASE
+        ):
+            print("ERROR: wrong record attribute")
+            failure = True
+        if record_type_and_attribute[0] != self.s3_record_prefixes[record_type]:
+            print("ERROR: wrong record type")
+            failure = True
+        if failure:
+            return None
+
+        if (
+            len(record_type_and_attribute) > 1
+            and record_type_and_attribute[1] == self._ATTRIBUTE_RELEASE
+        ):
+            release_branch = True
+
+        job_properties = job_suffix.split(self._DIV2)
+        job_name, job_digest, batch, num_batches = (
+            self._DIV2.join(job_properties[:-3]),
+            job_properties[-3],
+            int(job_properties[-2]),
+            int(job_properties[-1]),
+        )
+
+        if not is_hex(job_digest):
+            print("ERROR: wrong record job digest")
+            return None
+
+        record = self.Record(
+            record_type,
+            job_name,
+            job_digest,
+            batch,
+            num_batches,
+            release_branch,
+            file="",
+        )
+        return record
+
+    def update(self):
+        """
+        Pulls cache records from s3. Only records name w/o content.
+        """
+        for record_type in self.RecordType:
+            prefix = self.s3_record_prefixes[record_type]
+            cache_list = self.records[record_type]
+            for job_type in self.JobType:
+                path = self.cache_s3_paths[job_type]
+                records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET)
+                records = [record.split("/")[-1] for record in records]
+                GHActions.print_in_group(
+                    f"Cache records: [{record_type}] in [{job_type.value}]", records
+                )
+                for file in records:
+                    record = self._parse_record_file_name(
+                        record_type=record_type, file_name=file
+                    )
+                    if not record:
+                        print(f"ERROR: failed to parse cache record [{file}]")
+                        continue
+                    if (
+                        record.job_name not in self.job_digests
+                        or self.job_digests[record.job_name] != record.job_digest
+                    ):
+                        # skip records we are not interested in
+                        continue
+
+                    if record.to_str_key() not in cache_list:
+                        cache_list[record.to_str_key()] = record
+                        self.cache_data_fetched = False
+                    elif (
+                        not cache_list[record.to_str_key()].release_branch
+                        and record.release_branch
+                    ):
+                        # replace a non-release record with a release one
+                        cache_list[record.to_str_key()] = record
+                        self.cache_data_fetched = False
+
+        self.cache_updated = True
+        return self
+
+    def fetch_records_data(self):
+        """
+        Pulls CommitStatusData for all cached jobs from s3
+        """
+        if not self.cache_updated:
+            self.update()
+
+        if self.cache_data_fetched:
+            # there are no record w/o underling data - no need to fetch
+            return self
+
+        # clean up
+        for file in self._LOCAL_CACHE_PATH.glob("*.ci"):
+            file.unlink()
+
+        # download all record files
+        for job_type in self.JobType:
+            path = self.cache_s3_paths[job_type]
+            for record_type in self.RecordType:
+                prefix = self.s3_record_prefixes[record_type]
+                _ = self.s3.download_files(
+                    bucket=S3_BUILDS_BUCKET,
+                    s3_path=f"{path}{prefix}",
+                    file_suffix=self._RECORD_FILE_EXTENSION,
+                    local_directory=self._LOCAL_CACHE_PATH,
+                )
+
+        # validate we have files for all records and save file names meanwhile
+        for record_type in self.RecordType:
+            record_list = self.records[record_type]
+            for _, record in record_list.items():
+                record_file_name = self._get_record_file_name(
+                    record_type,
+                    record.job_name,
+                    record.batch,
+                    record.num_batches,
+                    record.release_branch,
+                )
+                assert (
+                    self._LOCAL_CACHE_PATH / record_file_name
+                ).is_file(), f"BUG. Record file must be present: {self._LOCAL_CACHE_PATH / record_file_name}"
+                record.file = record_file_name
+
+        self.cache_data_fetched = True
+        return self
+
+    def exist(
+        self,
+        record_type: "CiCache.RecordType",
+        job: str,
+        batch: int,
+        num_batches: int,
+        release_branch: bool,
+    ) -> bool:
+        if not self.cache_updated:
+            self.update()
+        record_key = self.Record(
+            record_type,
+            job,
+            self.job_digests[job],
+            batch,
+            num_batches,
+            release_branch,
+        ).to_str_key()
+        res = record_key in self.records[record_type]
+        if release_branch:
+            return res and self.records[record_type][record_key].release_branch
+        else:
+            return res
+
+    def push(
+        self,
+        record_type: "CiCache.RecordType",
+        job: str,
+        batches: Union[int, Sequence[int]],
+        num_batches: int,
+        status: Union[CommitStatusData, PendingState],
+        release_branch: bool = False,
+    ) -> None:
+        """
+        Pushes a cache record (CommitStatusData)
+        @release_branch adds "release" attribute to a record
+        """
+        if isinstance(batches, int):
+            batches = [batches]
+        for batch in batches:
+            record_file = self._LOCAL_CACHE_PATH / self._get_record_file_name(
+                record_type, job, batch, num_batches, release_branch
+            )
+            record_s3_path = self._get_record_s3_path(job)
+            if record_type == self.RecordType.SUCCESSFUL:
+                assert isinstance(status, CommitStatusData)
+                status.dump_to_file(record_file)
+            elif record_type == self.RecordType.PENDING:
+                assert isinstance(status, PendingState)
+                with open(record_file, "w") as json_file:
+                    json.dump(asdict(status), json_file)
+            else:
+                assert False
+
+            _ = self.s3.upload_file(
+                bucket=S3_BUILDS_BUCKET,
+                file_path=record_file,
+                s3_path=record_s3_path + record_file.name,
+            )
+            record = self.Record(
+                record_type,
+                job,
+                self.job_digests[job],
+                batch,
+                num_batches,
+                release_branch,
+                file=record_file.name,
+            )
+            if (
+                record.release_branch
+                or record.to_str_key() not in self.records[record_type]
+            ):
+                self.records[record_type][record.to_str_key()] = record
+
+    def get(
+        self, record_type: "CiCache.RecordType", job: str, batch: int, num_batches: int
+    ) -> Optional[Union[CommitStatusData, PendingState]]:
+        """
+        Gets a cache record data for a job, or None if a cache miss
+        """
+
+        if not self.cache_data_fetched:
+            self.fetch_records_data()
+
+        record_key = self.Record(
+            record_type,
+            job,
+            self.job_digests[job],
+            batch,
+            num_batches,
+            release_branch=False,
+        ).to_str_key()
+
+        if record_key not in self.records[record_type]:
+            return None
+
+        record_file_name = self.records[record_type][record_key].file
+
+        res = CommitStatusData.load_from_file(
+            self._LOCAL_CACHE_PATH / record_file_name
+        )  # type: CommitStatusData
+
+        return res
+
+    def delete(
+        self,
+        record_type: "CiCache.RecordType",
+        job: str,
+        batch: int,
+        num_batches: int,
+        release_branch: bool,
+    ) -> None:
+        """
+        deletes record from the cache
+        """
+        raise NotImplementedError("Let's try make cache push-and-read-only")
+        # assert (
+        #     record_type == self.RecordType.PENDING
+        # ), "FIXME: delete is supported for pending records only"
+        # record_file_name = self._get_record_file_name(
+        #     self.RecordType.PENDING,
+        #     job,
+        #     batch,
+        #     num_batches,
+        #     release_branch=release_branch,
+        # )
+        # record_s3_path = self._get_record_s3_path(job)
+        # self.s3.delete_file_from_s3(S3_BUILDS_BUCKET, record_s3_path + record_file_name)
+
+        # record_key = self.Record(
+        #     record_type,
+        #     job,
+        #     self.job_digests[job],
+        #     batch,
+        #     num_batches,
+        #     release_branch=False,
+        # ).to_str_key()
+
+        # if record_key in self.records[record_type]:
+        #     del self.records[record_type][record_key]
+
+    def is_successful(
+        self, job: str, batch: int, num_batches: int, release_branch: bool
+    ) -> bool:
+        """
+        checks if a given job have already been done successfuly
+        """
+        return self.exist(
+            self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
+        )
+
+    def is_pending(
+        self, job: str, batch: int, num_batches: int, release_branch: bool
+    ) -> bool:
+        """
+        check pending record in the cache for a given job
+        @release_branch - checks that "release" attribute is set for a record
+        """
+        if self.is_successful(job, batch, num_batches, release_branch):
+            # successful record is present - not pending
+            return False
+
+        return self.exist(
+            self.RecordType.PENDING, job, batch, num_batches, release_branch
+        )
+
+    def push_successful(
+        self,
+        job: str,
+        batch: int,
+        num_batches: int,
+        job_status: CommitStatusData,
+        release_branch: bool = False,
+    ) -> None:
+        """
+        Pushes a cache record (CommitStatusData)
+        @release_branch adds "release" attribute to a record
+        """
+        self.push(
+            self.RecordType.SUCCESSFUL,
+            job,
+            [batch],
+            num_batches,
+            job_status,
+            release_branch,
+        )
+
+    def push_pending(
+        self, job: str, batches: List[int], num_batches: int, release_branch: bool
+    ) -> None:
+        """
+        pushes pending record for a job to the cache
+        """
+        pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL)
+        self.push(
+            self.RecordType.PENDING,
+            job,
+            batches,
+            num_batches,
+            pending_state,
+            release_branch,
+        )
+
+    def get_successful(
+        self, job: str, batch: int, num_batches: int
+    ) -> Optional[CommitStatusData]:
+        """
+        Gets a cache record (CommitStatusData) for a job, or None if a cache miss
+        """
+        res = self.get(self.RecordType.SUCCESSFUL, job, batch, num_batches)
+        assert res is None or isinstance(res, CommitStatusData)
+        return res
+
+    def delete_pending(
+        self, job: str, batch: int, num_batches: int, release_branch: bool
+    ) -> None:
+        """
+        deletes pending record from the cache
+        """
+        self.delete(self.RecordType.PENDING, job, batch, num_batches, release_branch)
+
+    def download_build_reports(self, file_prefix: str = "") -> List[str]:
+        """
+        not ideal class for this method,
+        but let it be as we store build reports in CI cache directory on s3
+        and CiCache knows where exactly
+
+        @file_prefix allows to filter out reports by git head_ref
+        """
+        report_path = Path(REPORT_PATH)
+        report_path.mkdir(exist_ok=True, parents=True)
+        path = (
+            self._get_record_s3_path(Build.PACKAGE_RELEASE)
+            + self._CACHE_BUILD_REPORT_PREFIX
+        )
+        if file_prefix:
+            path += "_" + file_prefix
+        reports_files = self.s3.download_files(
+            bucket=S3_BUILDS_BUCKET,
+            s3_path=path,
+            file_suffix=".json",
+            local_directory=report_path,
+        )
+        return reports_files
+
+    def upload_build_report(self, build_result: BuildResult) -> str:
+        result_json_path = build_result.write_json(Path(TEMP_PATH))
+        s3_path = (
+            self._get_record_s3_path(Build.PACKAGE_RELEASE) + result_json_path.name
+        )
+        return self.s3.upload_file(
+            bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
+        )
+
+    # def await_jobs(self, jobs_with_params: Dict[str, Dict[str, Any]]) -> List[str]:
+    # if not jobs_with_params:
+    #     return []
+    # print(f"Start awaiting jobs [{list(jobs_with_params)}]")
+    # poll_interval_sec = 180
+    # start_at = int(time.time())
+    # TIMEOUT = 3000
+    # expired_sec = 0
+    # done_jobs = []  # type: List[str]
+    # while expired_sec < TIMEOUT and jobs_with_params:
+    #     time.sleep(poll_interval_sec)
+    #     self.update()
+    #     pending_finished: List[str] = []
+    #     for job_name in jobs_with_params:
+    #         num_batches = jobs_with_params[job_name]["num_batches"]
+    #         for batch in jobs_with_params[job_name]["batches"]:
+    #             if self.is_pending(job_name, batch, num_batches):
+    #                 continue
+    #             print(
+    #                 f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
+    #             )
+    #             pending_finished.append(job_name)
+    #     if pending_finished:
+    #         # restart timer
+    #         start_at = int(time.time())
+    #         expired_sec = 0
+    #         # remove finished jobs from awaiting list
+    #         for job in pending_finished:
+    #             del jobs_with_params[job]
+    #             done_jobs.append(job)
+    #     else:
+    #         expired_sec = int(time.time()) - start_at
+    #     print(f"  ...awaiting continues... time left [{TIMEOUT - expired_sec}]")
+    # if done_jobs:
+    #     print(
+    #         f"Awaiting OK. Left jobs: [{list(jobs_with_params)}], finished jobs: [{done_jobs}]"
+    #     )
+    # else:
+    #     print("Awaiting FAILED. No job has finished.")
+    # return done_jobs
+
+
 def get_check_name(check_name: str, batch: int, num_batches: int) -> str:
     res = check_name
     if num_batches > 1:
@@ -155,11 +740,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         default=False,
         help="will create run config for rebuilding all dockers, used in --configure action (for nightly docker job)",
     )
+    # FIXME: remove, not used
     parser.add_argument(
         "--rebuild-all-binaries",
         action="store_true",
         default=False,
-        help="will create run config without skipping build jobs in any case, used in --configure action (for release branches)",
+        help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in any case, used in --configure action (for release branches)",
     )
     parser.add_argument(
         "--commit-message",
@@ -169,23 +755,8 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
     return parser.parse_args()
 
 
-def get_file_flag_name(
-    job_name: str, digest: str, batch: int = 0, num_batches: int = 1
-) -> str:
-    if num_batches < 2:
-        return f"job_{job_name}_{digest}.ci"
-    else:
-        return f"job_{job_name}_{digest}_{batch}_{num_batches}.ci"
-
-
-def get_s3_path(build_digest: str) -> str:
-    return f"CI_data/BUILD-{build_digest}/"
-
-
-def get_s3_path_docs(digest: str) -> str:
-    return f"CI_data/DOCS-{digest}/"
-
-
+# FIXME: rewrite the docker job as regular reusable_test job and move interaction with docker hub inside job script
+#   that way run config will be more clean, workflow more generic and less api calls to dockerhub
 def check_missing_images_on_dockerhub(
     image_name_tag: Dict[str, str], arch: Optional[str] = None
 ) -> Dict[str, str]:
@@ -260,13 +831,93 @@ def check_missing_images_on_dockerhub(
     return result
 
 
-def _check_and_update_for_early_style_check(run_config: dict) -> None:
+def _pre_action(s3, indata, pr_info):
+    CommitStatusData.cleanup()
+    JobReport.cleanup()
+    BuildResult.cleanup()
+    ci_cache = CiCache(s3, indata["jobs_data"]["digests"])
+
+    # for release/master branches reports must be from the same branches
+    report_prefix = pr_info.head_ref if pr_info.number == 0 else ""
+    reports_files = ci_cache.download_build_reports(file_prefix=report_prefix)
+    print(f"Pre action done. Report files [{reports_files}] have been downloaded")
+
+
+def _mark_success_action(
+    s3: S3Helper,
+    indata: Dict[str, Any],
+    pr_info: PRInfo,
+    job: str,
+    batch: int,
+) -> None:
+    ci_cache = CiCache(s3, indata["jobs_data"]["digests"])
+    job_config = CI_CONFIG.get_job_config(job)
+    num_batches = job_config.num_batches
+    # if batch is not provided - set to 0
+    batch = 0 if batch == -1 else batch
+    assert (
+        0 <= batch < num_batches
+    ), f"--batch must be provided and in range [0, {num_batches}) for {job}"
+
+    # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api)
+    #   now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data
+    if CI_CONFIG.is_build_job(job):
+        # there is no status for build jobs
+        # create dummy success to mark it as done
+        # FIXME: consider creating commit status for build jobs too, to treat everything the same way
+        CommitStatusData("success", "dummy description", "dummy_url").dump_status()
+
+    job_status = None
+    if CommitStatusData.exist():
+        # normal scenario
+        job_status = CommitStatusData.load_status()
+    else:
+        # apparently exit after rerun-helper check
+        # do nothing, exit without failure
+        print(f"ERROR: no status file for job [{job}]")
+
+    if job_config.run_always or job_config.run_by_label:
+        print(f"Job [{job}] runs always or by label in CI - do not cache")
+    else:
+        if pr_info.is_master():
+            pass
+            # delete method is disabled for ci_cache. need it?
+            # pending enabled for master branch jobs only
+            # ci_cache.delete_pending(job, batch, num_batches, release_branch=True)
+        if job_status and job_status.is_ok():
+            ci_cache.push_successful(
+                job, batch, num_batches, job_status, pr_info.is_release_branch()
+            )
+            print(f"Job [{job}] is ok")
+        elif job_status:
+            print(f"Job [{job}] is not ok, status [{job_status.status}]")
+
+
+def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None:
+    if outfile:
+        with open(outfile, "w") as f:
+            if isinstance(result, str):
+                print(result, file=f)
+            elif isinstance(result, dict):
+                print(json.dumps(result, indent=2 if pretty else None), file=f)
+            else:
+                raise AssertionError(f"Unexpected type for 'res': {type(result)}")
+    else:
+        if isinstance(result, str):
+            print(result)
+        elif isinstance(result, dict):
+            print(json.dumps(result, indent=2 if pretty else None))
+        else:
+            raise AssertionError(f"Unexpected type for 'res': {type(result)}")
+
+
+def _check_and_update_for_early_style_check(jobs_data: dict, docker_data: dict) -> None:
     """
     This is temporary hack to start style check before docker build if possible
     FIXME: need better solution to do style check as soon as possible and as fast as possible w/o dependency on docker job
     """
-    jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", [])
-    docker_to_build = run_config.get("docker_data", {}).get("missing_multi", [])
+    jobs_to_do = jobs_data.get("jobs_to_do", [])
+    docker_to_build = docker_data.get("missing_multi", [])
     if (
         JobNames.STYLE_CHECK in jobs_to_do
         and docker_to_build
@@ -276,13 +927,16 @@ def _check_and_update_for_early_style_check(run_config: dict) -> None:
         jobs_to_do[index] = "Style check early"
 
 
-def _update_config_for_docs_only(run_config: dict) -> None:
+def _update_config_for_docs_only(jobs_data: dict) -> None:
     DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
     print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]")
-    jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", [])
-    run_config["jobs_data"]["jobs_to_do"] = [
-        job for job in jobs_to_do if job in DOCS_CHECK_JOBS
-    ]
+    jobs_to_do = jobs_data.get("jobs_to_do", [])
+    jobs_data["jobs_to_do"] = [job for job in jobs_to_do if job in DOCS_CHECK_JOBS]
+    jobs_data["jobs_to_wait"] = {
+        job: params
+        for job, params in jobs_data["jobs_to_wait"].items()
+        if job in DOCS_CHECK_JOBS
+    }
 
 
 def _configure_docker_jobs(
@@ -351,14 +1005,11 @@ def _configure_docker_jobs(
 
 
 def _configure_jobs(
-    build_digest: str,
-    docs_digest: str,
     job_digester: JobDigester,
     s3: S3Helper,
-    rebuild_all_binaries: bool,
-    pr_labels: Iterable[str],
+    pr_info: PRInfo,
     commit_tokens: List[str],
-    ci_cache_enabled: bool,
+    ci_cache_disabled: bool,
 ) -> Dict:
     ## a. digest each item from the config
     job_digester = JobDigester()
@@ -374,19 +1025,12 @@ def _configure_jobs(
         print(f"    job [{job.rjust(50)}] has digest [{digest}]")
     print("::endgroup::")
 
-    ## b. check if we have something done
-    if ci_cache_enabled:
-        done_files = []
-    else:
-        path = get_s3_path(build_digest)
-        done_files = s3.list_prefix(path)
-        done_files = [file.split("/")[-1] for file in done_files]
-        # print(f"S3 CI files for the build [{build_digest}]: {done_files}")
-        docs_path = get_s3_path_docs(docs_digest)
-        done_files_docs = s3.list_prefix(docs_path)
-        done_files_docs = [file.split("/")[-1] for file in done_files_docs]
-        # print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}")
-        done_files += done_files_docs
+    ## b. check what we need to run
+    ci_cache = None
+    if not ci_cache_disabled:
+        ci_cache = CiCache(s3, digests)
+
+    jobs_to_wait: Dict[str, Dict[str, Any]] = {}
 
     for job in digests:
         digest = digests[job]
@@ -394,22 +1038,37 @@ def _configure_jobs(
         num_batches: int = job_config.num_batches
         batches_to_do: List[int] = []
 
-        if job_config.run_by_label:
-            # this job controlled by label, add to todo if it's labe is set in pr
-            if job_config.run_by_label in pr_labels:
-                for batch in range(num_batches):  # type: ignore
-                    batches_to_do.append(batch)
-        elif job_config.run_always:
-            # always add to todo
-            batches_to_do.append(batch)
-        else:
-            # this job controlled by digest, add to todo if it's not successfully done before
-            for batch in range(num_batches):  # type: ignore
-                success_flag_name = get_file_flag_name(job, digest, batch, num_batches)
-                if success_flag_name not in done_files or (
-                    rebuild_all_binaries and CI_CONFIG.is_build_job(job)
-                ):
+        for batch in range(num_batches):  # type: ignore
+            if job_config.pr_only and pr_info.is_release_branch():
+                continue
+            if job_config.run_by_label:
+                # this job controlled by label, add to todo if its label is set in pr
+                if job_config.run_by_label in pr_info.labels:
                     batches_to_do.append(batch)
+            elif job_config.run_always:
+                # always add to todo
+                batches_to_do.append(batch)
+            elif not ci_cache:
+                batches_to_do.append(batch)
+            elif not ci_cache.is_successful(
+                job,
+                batch,
+                num_batches,
+                release_branch=pr_info.is_release_branch()
+                and job_config.required_on_release_branch,
+            ):
+                # ci cache is enabled and job is not in the cache - add
+                batches_to_do.append(batch)
+
+                # check if it's pending in the cache
+                if ci_cache.is_pending(job, batch, num_batches, release_branch=False):
+                    if job in jobs_to_wait:
+                        jobs_to_wait[job]["batches"].append(batch)
+                    else:
+                        jobs_to_wait[job] = {
+                            "batches": [batch],
+                            "num_batches": num_batches,
+                        }
 
         if batches_to_do:
             jobs_to_do.append(job)
@@ -420,11 +1079,11 @@ def _configure_jobs(
         else:
             jobs_to_skip.append(job)
 
-    ## c. check CI controlling labels commit messages
-    if pr_labels:
+    ## c. check CI controlling labels and commit messages
+    if pr_info.labels:
         jobs_requested_by_label = []  # type: List[str]
         ci_controlling_labels = []  # type: List[str]
-        for label in pr_labels:
+        for label in pr_info.labels:
             label_config = CI_CONFIG.get_label_config(label)
             if label_config:
                 jobs_requested_by_label += label_config.run_jobs
@@ -434,6 +1093,8 @@ def _configure_jobs(
             print(
                 f"    :   following jobs will be executed: [{jobs_requested_by_label}]"
             )
+            # so far there is only "do not test" label in the config that runs only Style check.
+            #  check later if we need to filter out requested jobs using ci cache. right now we do it:
             jobs_to_do = [job for job in jobs_requested_by_label if job in jobs_to_do]
 
     if commit_tokens:
@@ -482,68 +1143,39 @@ def _configure_jobs(
         "digests": digests,
         "jobs_to_do": jobs_to_do,
         "jobs_to_skip": jobs_to_skip,
+        "jobs_to_wait": jobs_to_wait,
         "jobs_params": {
             job: params for job, params in jobs_params.items() if job in jobs_to_do
         },
     }
 
 
-def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
+def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
     if indata["ci_flags"][Labels.NO_CI_CACHE]:
         print("CI cache is disabled - skip restoring commit statuses from CI cache")
         return
-
-    temp_path = Path(TEMP_PATH)
-    if not temp_path.exists():
-        temp_path.mkdir(parents=True, exist_ok=True)
-
-    # clean up before start
-    for file in temp_path.glob("*.ci"):
-        file.unlink()
-
-    # download all metadata files
-    path = get_s3_path(indata["build"])
-    files = s3.download_files(  # type: ignore
-        bucket=S3_BUILDS_BUCKET,
-        s3_path=path,
-        file_suffix=".ci",
-        local_directory=temp_path,
-    )
-    print(f"CI metadata files [{files}]")
-    path = get_s3_path_docs(indata["docs"])
-    files_docs = s3.download_files(  # type: ignore
-        bucket=S3_BUILDS_BUCKET,
-        s3_path=path,
-        file_suffix=".ci",
-        local_directory=temp_path,
-    )
-    print(f"CI docs metadata files [{files_docs}]")
-    files += files_docs
-
-    # parse CI metadata
     job_digests = indata["jobs_data"]["digests"]
+    ci_cache = CiCache(s3, job_digests).update().fetch_records_data()
+
     # create GH status
     pr_info = PRInfo()
     commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
 
-    def run_create_status(job, digest, batch, num_batches):
-        success_flag_name = get_file_flag_name(job, digest, batch, num_batches)
-        if success_flag_name in files:
-            print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
-            job_status = CommitStatusData.load_from_file(
-                f"{TEMP_PATH}/{success_flag_name}"
-            )  # type: CommitStatusData
-            assert job_status.status == SUCCESS, "BUG!"
-            commit.create_status(
-                state=job_status.status,
-                target_url=job_status.report_url,
-                description=format_description(
-                    f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
-                    f"{job_status.description}"
-                ),
-                context=get_check_name(job, batch=batch, num_batches=num_batches),
-            )
-            print(f"GH status re-created from file [{success_flag_name}]")
+    def _run_create_status(job: str, batch: int, num_batches: int) -> None:
+        job_status = ci_cache.get_successful(job, batch, num_batches)
+        if not job_status:
+            return
+        print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
+        assert job_status.status == "success", "BUG!"
+        commit.create_status(
+            state=job_status.status,
+            target_url=job_status.report_url,
+            description=format_description(
+                f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
+                f"{job_status.description}"
+            ),
+            context=get_check_name(job, batch=batch, num_batches=num_batches),
+        )
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
         futures = []
@@ -551,12 +1183,9 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
             if CI_CONFIG.is_build_job(job):
                 # no GH status for build jobs
                 continue
-            digest = job_digests[job]
             num_batches = CI_CONFIG.get_job_config(job).num_batches
             for batch in range(num_batches):
-                future = executor.submit(
-                    run_create_status, job, digest, batch, num_batches
-                )
+                future = executor.submit(_run_create_status, job, batch, num_batches)
                 futures.append(future)
         done, _ = concurrent.futures.wait(futures)
         for future in done:
@@ -568,11 +1197,6 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
     set_status_comment(commit, pr_info)
     print("... CI report update - done")
 
-    # clean up
-    ci_files = list(temp_path.glob("*.ci"))
-    for file in ci_files:
-        file.unlink()
-
 
 def _fetch_commit_tokens(message: str) -> List[str]:
     pattern = r"#[\w-]+"
@@ -584,7 +1208,7 @@ def _fetch_commit_tokens(message: str) -> List[str]:
 def _upload_build_artifacts(
     pr_info: PRInfo,
     build_name: str,
-    build_digest: str,
+    ci_cache: CiCache,
     job_report: JobReport,
     s3: S3Helper,
     s3_destination: str,
@@ -640,12 +1264,8 @@ def _upload_build_artifacts(
         head_ref=pr_info.head_ref,
         pr_number=pr_info.number,
     )
-    result_json_path = build_result.write_json()
-    s3_path = get_s3_path(build_digest) + result_json_path.name
-    build_report_url = s3.upload_file(
-        bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
-    )
-    print(f"Report file [{result_json_path}] has been uploaded to [{build_report_url}]")
+    report_url = ci_cache.upload_build_report(build_result)
+    print(f"Report file has been uploaded to [{report_url}]")
 
     # Upload head master binaries
     static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name
@@ -852,9 +1472,6 @@ def main() -> int:
 
     ### CONFIGURE action: start
     if args.configure:
-        docker_data = {}
-        git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD")
-
         # if '#no_merge_commit' is set in commit message - set git ref to PR branch head to avoid merge-commit
         tokens = []
         ci_flags = {
@@ -876,6 +1493,9 @@ def main() -> int:
             ci_flags[Labels.NO_CI_CACHE] = True
             print("NOTE: Disable CI Cache")
 
+        docker_data = {}
+        git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD")
+
         # let's get CH version
         version = get_version_from_repo(git=Git(True)).string
         print(f"Got CH version for this commit: [{version}]")
@@ -897,13 +1517,9 @@ def main() -> int:
         )
         jobs_data = (
             _configure_jobs(
-                build_digest,
-                docs_digest,
                 job_digester,
                 s3,
-                # FIXME: add suport for master wf w/o rebuilds
-                args.rebuild_all_binaries or pr_info.is_master(),
-                pr_info.labels,
+                pr_info,
                 tokens,
                 ci_flags[Labels.NO_CI_CACHE],
             )
@@ -911,6 +1527,39 @@ def main() -> int:
             else {}
         )
 
+        # FIXME: Early style check manipulates with job names might be not robust with await feature
+        if pr_info.number != 0 and not args.docker_digest_or_latest:
+            # FIXME: it runs style check before docker build if possible (style-check images is not changed)
+            #    find a way to do style check always before docker build and others
+            _check_and_update_for_early_style_check(jobs_data, docker_data)
+        if args.skip_jobs and pr_info.has_changes_in_documentation_only():
+            _update_config_for_docs_only(jobs_data)
+
+        # TODO: await pending jobs
+        # wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited
+        ci_cache = CiCache(s3, jobs_data["digests"])
+        # awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {}))
+        # for job in awaited_jobs:
+        #     jobs_to_do = jobs_data["jobs_to_do"]
+        #     if job in jobs_to_do:
+        #         jobs_to_do.remove(job)
+        #     else:
+        #         assert False, "BUG"
+
+        # set planned jobs as pending in the CI cache if on the master
+        if pr_info.is_master():
+            for job in jobs_data["jobs_to_do"]:
+                config = CI_CONFIG.get_job_config(job)
+                if config.run_always or config.run_by_label:
+                    continue
+                job_params = jobs_data["jobs_params"][job]
+                ci_cache.push_pending(
+                    job,
+                    job_params["batches"],
+                    config.num_batches,
+                    release_branch=pr_info.is_release_branch(),
+                )
+
         # conclude results
         result["git_ref"] = git_ref
         result["version"] = version
@@ -919,49 +1568,12 @@ def main() -> int:
         result["ci_flags"] = ci_flags
         result["jobs_data"] = jobs_data
         result["docker_data"] = docker_data
-        if (
-            not args.skip_jobs
-            and pr_info.number != 0
-            and not args.docker_digest_or_latest
-        ):
-            # FIXME: it runs style check before docker build if possible (style-check images is not changed)
-            #    find a way to do style check always before docker build and others
-            _check_and_update_for_early_style_check(result)
-        if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
-            _update_config_for_docs_only(result)
     ### CONFIGURE action: end
 
     ### PRE action: start
     elif args.pre:
-        CommitStatusData.cleanup()
-        JobReport.cleanup()
-        BuildResult.cleanup()
-
         assert indata, "Run config must be provided via --infile"
-        report_path = Path(REPORT_PATH)
-        report_path.mkdir(exist_ok=True, parents=True)
-        path = get_s3_path(indata["build"])
-        reports_files = s3.download_files(  # type: ignore
-            bucket=S3_BUILDS_BUCKET,
-            s3_path=path,
-            file_suffix=".json",
-            local_directory=report_path,
-        )
-        # for release/master branches reports must be created on the same branches
-        files = []
-        if pr_info.number == 0:
-            for file in reports_files:
-                if pr_info.head_ref not in file:
-                    # keep reports from the same branch only, if not in a PR
-                    (report_path / file).unlink()
-                    print(f"drop report: [{report_path / file}]")
-                else:
-                    files.append(file)
-            reports_files = files
-        print(
-            f"Pre action done. Report files [{reports_files}] have been downloaded from [{path}] to [{report_path}]"
-        )
-    ### PRE action: end
+        _pre_action(s3, indata, pr_info)
 
     ### RUN action: start
     elif args.run:
@@ -993,6 +1605,9 @@ def main() -> int:
                 print("::endgroup::")
         else:
             # this is a test job - check if GH commit status is present
+
+            # rerun helper check
+            # FIXME: remove rerun_helper check and rely on ci cache only
             commit = get_commit(
                 Github(get_best_robot_token(), per_page=100), pr_info.sha
             )
@@ -1005,6 +1620,38 @@ def main() -> int:
                 print(status)
                 print("::endgroup::")
 
+            # ci cache check
+            elif not indata["ci_flags"][Labels.NO_CI_CACHE]:
+                ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
+                job_config = CI_CONFIG.get_job_config(check_name)
+                if ci_cache.is_successful(
+                    check_name,
+                    args.batch,
+                    job_config.num_batches,
+                    job_config.required_on_release_branch,
+                ):
+                    job_status = ci_cache.get_successful(
+                        check_name, args.batch, job_config.num_batches
+                    )
+                    assert job_status, "BUG"
+                    commit.create_status(
+                        state=job_status.status,
+                        target_url=job_status.report_url,
+                        description=format_description(
+                            f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
+                            f"{job_status.description}"
+                        ),
+                        context=get_check_name(
+                            check_name,
+                            batch=args.batch,
+                            num_batches=job_config.num_batches,
+                        ),
+                    )
+                    previous_status = job_status.status
+                    print("::group::Commit Status Data")
+                    print(job_status)
+                    print("::endgroup::")
+
         if previous_status:
             print(
                 f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]"
@@ -1019,15 +1666,15 @@ def main() -> int:
 
     ### POST action: start
     elif args.post:
-        assert (
-            not CI_CONFIG.is_build_job(args.job_name) or indata
-        ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]"
         job_report = JobReport.load() if JobReport.exist() else None
         if job_report:
             ch_helper = ClickHouseHelper()
             check_url = ""
 
             if CI_CONFIG.is_build_job(args.job_name):
+                assert (
+                    indata
+                ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]"
                 build_name = args.job_name
                 s3_path_prefix = "/".join(
                     (
@@ -1039,7 +1686,7 @@ def main() -> int:
                 log_url = _upload_build_artifacts(
                     pr_info,
                     build_name,
-                    build_digest=indata["build"],  # type: ignore
+                    ci_cache=CiCache(s3, indata["jobs_data"]["digests"]),
                     job_report=job_report,
                     s3=s3,
                     s3_destination=s3_path_prefix,
@@ -1116,80 +1763,16 @@ def main() -> int:
     ### MARK SUCCESS action: start
     elif args.mark_success:
         assert indata, "Run config must be provided via --infile"
-        job = args.job_name
-        job_config = CI_CONFIG.get_job_config(job)
-        num_batches = job_config.num_batches
-        assert (
-            num_batches <= 1 or 0 <= args.batch < num_batches
-        ), f"--batch must be provided and in range [0, {num_batches}) for {job}"
-
-        # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api)
-        #   now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data
-        if CI_CONFIG.is_build_job(job):
-            # there is no status for build jobs
-            # create dummy success to mark it as done
-            job_status = CommitStatusData(
-                status="success", description="dummy status", report_url="dummy_url"
-            )
-        else:
-            if not CommitStatusData.is_present():
-                # apparently exit after rerun-helper check
-                # do nothing, exit without failure
-                print(f"ERROR: no status file for job [{job}]")
-                job_status = CommitStatusData(
-                    status="dummy failure",
-                    description="dummy status",
-                    report_url="dummy_url",
-                )
-            else:
-                # normal case
-                job_status = CommitStatusData.load_status()
-
-        # Storing job data (report_url) to restore OK GH status on job results reuse
-        if job_config.run_always:
-            print(f"Job [{job}] runs always in CI - do not mark as done")
-        elif job_status.is_ok():
-            success_flag_name = get_file_flag_name(
-                job, indata["jobs_data"]["digests"][job], args.batch, num_batches
-            )
-            if not CI_CONFIG.is_docs_job(job):
-                path = get_s3_path(indata["build"]) + success_flag_name
-            else:
-                path = get_s3_path_docs(indata["docs"]) + success_flag_name
-            job_status.dump_to_file(success_flag_name)
-            _ = s3.upload_file(
-                bucket=S3_BUILDS_BUCKET, file_path=success_flag_name, s3_path=path
-            )
-            os.remove(success_flag_name)
-            print(
-                f"Job [{job}] with digest [{indata['jobs_data']['digests'][job]}] {f'and batch {args.batch}/{num_batches}' if num_batches > 1 else ''} marked as successful. path: [{path}]"
-            )
-        else:
-            print(f"Job [{job}] is not ok, status [{job_status.status}]")
-    ### MARK SUCCESS action: end
+        _mark_success_action(s3, indata, pr_info, args.job_name, args.batch)
 
     ### UPDATE GH STATUSES action: start
     elif args.update_gh_statuses:
         assert indata, "Run config must be provided via --infile"
-        _update_gh_statuses(indata=indata, s3=s3)
-    ### UPDATE GH STATUSES action: end
+        _update_gh_statuses_action(indata=indata, s3=s3)
 
     ### print results
-    if args.outfile:
-        with open(args.outfile, "w") as f:
-            if isinstance(result, str):
-                print(result, file=f)
-            elif isinstance(result, dict):
-                print(json.dumps(result, indent=2 if args.pretty else None), file=f)
-            else:
-                raise AssertionError(f"Unexpected type for 'res': {type(result)}")
-    else:
-        if isinstance(result, str):
-            print(result)
-        elif isinstance(result, dict):
-            print(json.dumps(result, indent=2 if args.pretty else None))
-        else:
-            raise AssertionError(f"Unexpected type for 'res': {type(result)}")
+    _print_results(result, args.outfile, args.pretty)
+
     return exit_code
 
 
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 61f75edfa2f..347de73b5ed 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -11,6 +11,10 @@ from integration_test_images import IMAGES
 
 
 class Labels(metaclass=WithIter):
+    """
+    Label names or commit tokens in normalized form
+    """
+
     DO_NOT_TEST_LABEL = "do_not_test"
     NO_MERGE_COMMIT = "no_merge_commit"
     NO_CI_CACHE = "no_ci_cache"
@@ -111,7 +115,6 @@ class JobNames(metaclass=WithIter):
     PERFORMANCE_TEST_AMD64 = "Performance Comparison"
     PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64"
 
-    SQL_LANCER_TEST = "SQLancer (release)"
     SQL_LOGIC_TEST = "Sqllogic test (release)"
 
     SQLANCER = "SQLancer (release)"
@@ -132,6 +135,8 @@ class JobNames(metaclass=WithIter):
     DOCS_CHECK = "Docs check"
     BUGFIX_VALIDATE = "tests bugfix validate check"
 
+    MARK_RELEASE_READY = "Mark Commit Release Ready"
+
 
 # dynamically update JobName with Build jobs
 for attr_name in dir(Build):
@@ -156,7 +161,7 @@ class DigestConfig:
 @dataclass
 class LabelConfig:
     """
-    class to configure different CI scenarious per GH label or commit message token
+    configures different CI scenarious per GH label
     """
 
     run_jobs: Iterable[str] = frozenset()
@@ -165,19 +170,26 @@ class LabelConfig:
 @dataclass
 class JobConfig:
     """
-    contains config parameter relevant for job execution in CI workflow
-    @digest - configures digest calculation for the job
-    @run_command - will be triggered for the job if omited in CI workflow yml
-    @timeout
-    @num_batches - sets number of batches for multi-batch job
+    contains config parameters for job execution in CI workflow
     """
 
+    # configures digest calculation for the job
     digest: DigestConfig = field(default_factory=DigestConfig)
+    # will be triggered for the job if omited in CI workflow yml
     run_command: str = ""
+    # job timeout
     timeout: Optional[int] = None
+    # sets number of batches for multi-batch job
     num_batches: int = 1
+    # label that enables job in CI, if set digest won't be used
     run_by_label: str = ""
+    # to run always regardless of the job digest or/and label
     run_always: bool = False
+    # if the job needs to be run on the release branch, including master (e.g. building packages, docker server).
+    # NOTE: Subsequent runs on the same branch with the similar digest are still considered skippable.
+    required_on_release_branch: bool = False
+    # job is for pr workflow only
+    pr_only: bool = False
 
 
 @dataclass
@@ -194,6 +206,7 @@ class BuildConfig:
     static_binary_name: str = ""
     job_config: JobConfig = field(
         default_factory=lambda: JobConfig(
+            required_on_release_branch=True,
             digest=DigestConfig(
                 include_paths=[
                     "./src",
@@ -614,6 +627,8 @@ CI_CONFIG = CiConfig(
                             "tsan",
                             "msan",
                             "ubsan",
+                            # skip build report jobs as not all builds will be done
+                            "build check",
                         )
                     ]
                 )
@@ -780,15 +795,19 @@ CI_CONFIG = CiConfig(
         ),
     },
     other_jobs_configs={
+        JobNames.MARK_RELEASE_READY: TestConfig(
+            "", job_config=JobConfig(required_on_release_branch=True)
+        ),
         JobNames.DOCKER_SERVER: TestConfig(
             "",
             job_config=JobConfig(
+                required_on_release_branch=True,
                 digest=DigestConfig(
                     include_paths=[
                         "tests/ci/docker_server.py",
                         "./docker/server",
                     ]
-                )
+                ),
             ),
         ),
         JobNames.DOCKER_KEEPER: TestConfig(
@@ -799,7 +818,7 @@ CI_CONFIG = CiConfig(
                         "tests/ci/docker_server.py",
                         "./docker/keeper",
                     ]
-                )
+                ),
             ),
         ),
         JobNames.DOCS_CHECK: TestConfig(
@@ -814,11 +833,12 @@ CI_CONFIG = CiConfig(
         JobNames.FAST_TEST: TestConfig(
             "",
             job_config=JobConfig(
+                pr_only=True,
                 digest=DigestConfig(
                     include_paths=["./tests/queries/0_stateless/"],
                     exclude_files=[".md"],
                     docker=["clickhouse/fasttest"],
-                )
+                ),
             ),
         ),
         JobNames.STYLE_CHECK: TestConfig(
@@ -988,11 +1008,15 @@ CI_CONFIG = CiConfig(
         ),
         JobNames.COMPATIBILITY_TEST: TestConfig(
             Build.PACKAGE_RELEASE,
-            job_config=JobConfig(digest=compatibility_check_digest),
+            job_config=JobConfig(
+                required_on_release_branch=True, digest=compatibility_check_digest
+            ),
         ),
         JobNames.COMPATIBILITY_TEST_ARM: TestConfig(
             Build.PACKAGE_AARCH64,
-            job_config=JobConfig(digest=compatibility_check_digest),
+            job_config=JobConfig(
+                required_on_release_branch=True, digest=compatibility_check_digest
+            ),
         ),
         JobNames.UNIT_TEST: TestConfig(
             Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params)  # type: ignore
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index 3c267cff79d..7e2a3d11725 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
 import os
-from typing import Union, Iterator
+from typing import List, Union, Iterator
 from pathlib import Path
 
 
@@ -17,3 +17,21 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
         yield
     finally:
         os.chdir(oldpwd)
+
+
+def is_hex(s):
+    try:
+        int(s, 16)
+        return True
+    except ValueError:
+        return False
+
+
+class GHActions:
+    @staticmethod
+    def print_in_group(group_name: str, lines: Union[str, List[str]]) -> None:
+        lines = list(lines)
+        print(f"::group::{group_name}")
+        for line in lines:
+            print(line)
+        print("::endgroup::")
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 598eef9922e..851a4cc5298 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -350,7 +350,7 @@ class CommitStatusData:
         return cls.load_from_file(STATUS_FILE_PATH)
 
     @classmethod
-    def is_present(cls) -> bool:
+    def exist(cls) -> bool:
         return STATUS_FILE_PATH.is_file()
 
     def dump_status(self) -> None:
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 524da916a5e..5a98fa06b39 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -29,7 +29,7 @@ from tee_popen import TeePopen
 from clickhouse_helper import get_instance_type, get_instance_id
 from stopwatch import Stopwatch
 from build_download_helper import download_builds_filter
-from report import JobReport
+from report import SUCCESS, JobReport
 
 IMAGE_NAME = "clickhouse/performance-comparison"
 
@@ -223,7 +223,7 @@ def main():
             message = message_match.group(1).strip()
 
         # TODO: Remove me, always green mode for the first time, unless errors
-        status = "success"
+        status = SUCCESS
         if "errors" in message.lower() or too_many_slow(message.lower()):
             status = "failure"
         # TODO: Remove until here
@@ -249,7 +249,7 @@ def main():
         check_name=check_name_with_group,
     ).dump()
 
-    if status == "error":
+    if status != SUCCESS:
         sys.exit(1)
 
 
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index d1be459666f..744de7dea72 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -287,7 +287,10 @@ class PRInfo:
             self.fetch_changed_files()
 
     def is_master(self) -> bool:
-        return self.number == 0 and self.base_ref == "master"
+        return self.number == 0 and self.head_ref == "master"
+
+    def is_release_branch(self) -> bool:
+        return self.number == 0
 
     def is_scheduled(self):
         return self.event_type == EventType.SCHEDULE
diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py
index 616d645b5a6..bff53f00ad3 100644
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@@ -107,6 +107,9 @@ class S3Helper:
         logging.info("Upload %s to %s. Meta: %s", file_path, url, metadata)
         return url
 
+    def delete_file_from_s3(self, bucket_name: str, s3_path: str) -> None:
+        self.client.delete_object(Bucket=bucket_name, Key=s3_path)
+
     def upload_test_report_to_s3(self, file_path: Path, s3_path: str) -> str:
         if CI:
             return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
diff --git a/tests/ci/test_ci_cache.py b/tests/ci/test_ci_cache.py
new file mode 100644
index 00000000000..0f8acf2656c
--- /dev/null
+++ b/tests/ci/test_ci_cache.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python
+
+from hashlib import md5
+from pathlib import Path
+import shutil
+from typing import Dict, Set
+import unittest
+from ci_config import Build, JobNames
+from s3_helper import S3Helper
+from ci import CiCache
+from digest_helper import JOB_DIGEST_LEN
+from commit_status_helper import CommitStatusData
+from env_helper import S3_BUILDS_BUCKET, TEMP_PATH
+
+
+def _create_mock_digest_1(string):
+    return md5((string).encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN]
+
+
+def _create_mock_digest_2(string):
+    return md5((string + "+nonce").encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN]
+
+
+DIGESTS = {job: _create_mock_digest_1(job) for job in JobNames}
+DIGESTS2 = {job: _create_mock_digest_2(job) for job in JobNames}
+
+
+# pylint:disable=protected-access
+class S3HelperTestMock(S3Helper):
+    def __init__(self) -> None:
+        super().__init__()
+        self.files_on_s3_paths = {}  # type: Dict[str, Set[str]]
+
+        # local path which is mocking remote s3 path with ci_cache
+        self.mock_remote_s3_path = Path(TEMP_PATH) / "mock_s3_path"
+        if not self.mock_remote_s3_path.exists():
+            self.mock_remote_s3_path.mkdir(parents=True, exist_ok=True)
+        for file in self.mock_remote_s3_path.iterdir():
+            file.unlink()
+
+    def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET):
+        assert bucket == S3_BUILDS_BUCKET
+        file_prefix = Path(s3_prefix_path).name
+        path = str(Path(s3_prefix_path).parent)
+        return [
+            path + "/" + file
+            for file in self.files_on_s3_paths[path]
+            if file.startswith(file_prefix)
+        ]
+
+    def upload_file(self, bucket, file_path, s3_path):
+        assert bucket == S3_BUILDS_BUCKET
+        file_name = Path(file_path).name
+        assert (
+            file_name in s3_path
+        ), f"Record file name [{file_name}] must be part of a path on s3 [{s3_path}]"
+        s3_path = str(Path(s3_path).parent)
+        if s3_path in self.files_on_s3_paths:
+            self.files_on_s3_paths[s3_path].add(file_name)
+        else:
+            self.files_on_s3_paths[s3_path] = set([file_name])
+        shutil.copy(file_path, self.mock_remote_s3_path)
+
+    def download_files(self, bucket, s3_path, file_suffix, local_directory):
+        assert bucket == S3_BUILDS_BUCKET
+        assert file_suffix == CiCache._RECORD_FILE_EXTENSION
+        assert local_directory == CiCache._LOCAL_CACHE_PATH
+        assert CiCache._S3_CACHE_PREFIX in s3_path
+        assert [job_type.value in s3_path for job_type in CiCache.JobType]
+
+        # copying from mock remote path to local cache
+        for remote_record in self.mock_remote_s3_path.glob(f"*{file_suffix}"):
+            destination_file = CiCache._LOCAL_CACHE_PATH / remote_record.name
+            shutil.copy(remote_record, destination_file)
+
+
+# pylint:disable=protected-access
+class TestCiCache(unittest.TestCase):
+    def test_cache(self):
+        s3_mock = S3HelperTestMock()
+        ci_cache = CiCache(s3_mock, DIGESTS)
+        # immitate another CI run is using cache
+        ci_cache_2 = CiCache(s3_mock, DIGESTS2)
+        NUM_BATCHES = 10
+
+        DOCS_JOBS_NUM = 1
+        assert len(set(job for job in JobNames)) == len(list(job for job in JobNames))
+        NONDOCS_JOBS_NUM = len(set(job for job in JobNames)) - DOCS_JOBS_NUM
+
+        PR_NUM = 123456
+        status = CommitStatusData(
+            status="success",
+            report_url="dummy url",
+            description="OK OK OK",
+            sha="deadbeaf2",
+            pr_num=PR_NUM,
+        )
+
+        ### add some pending statuses for two batches and on non-release branch
+        for job in JobNames:
+            ci_cache.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False)
+            ci_cache_2.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False)
+
+        ### add success status for 0 batch, non-release branch
+        for job in JobNames:
+            ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=False)
+            ci_cache_2.push_successful(
+                job, 0, NUM_BATCHES, status, release_branch=False
+            )
+
+        ### check all expected directories were created on s3 mock
+        expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(Build.PACKAGE_RELEASE)}"
+        expected_docs_path_1 = (
+            f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(JobNames.DOCS_CHECK)}"
+        )
+        expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(Build.PACKAGE_RELEASE)}"
+        expected_docs_path_2 = (
+            f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(JobNames.DOCS_CHECK)}"
+        )
+        self.assertCountEqual(
+            list(s3_mock.files_on_s3_paths.keys()),
+            [
+                f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}",
+                f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}",
+                f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_2}",
+                f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_2}",
+            ],
+        )
+
+        ### check number of cache files is as expected
+        FILES_PER_JOB = 3  # 1 successful + 2 pending batches = 3
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}"
+                ]
+            ),
+            NONDOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}"
+                ]
+            ),
+            DOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_2}"
+                ]
+            ),
+            NONDOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_2}"
+                ]
+            ),
+            DOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+
+        ### check statuses for all jobs in cache
+        for job in JobNames:
+            self.assertEqual(
+                ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=False), True
+            )
+            self.assertEqual(
+                ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=True), False
+            )
+            self.assertEqual(
+                ci_cache.is_successful(
+                    job, batch=1, num_batches=NUM_BATCHES, release_branch=False
+                ),
+                False,
+            )  # false - it's pending
+            self.assertEqual(
+                ci_cache.is_successful(
+                    job,
+                    batch=NUM_BATCHES,
+                    num_batches=NUM_BATCHES,
+                    release_branch=False,
+                ),
+                False,
+            )  # false - no such record
+            self.assertEqual(
+                ci_cache.is_pending(job, 0, NUM_BATCHES, release_branch=False), False
+            )  # false, it's successful, success has more priority than pending
+            self.assertEqual(
+                ci_cache.is_pending(job, 1, NUM_BATCHES, release_branch=False), True
+            )  # true
+            self.assertEqual(
+                ci_cache.is_pending(job, 1, NUM_BATCHES, release_branch=True), False
+            )  # false, not pending job on release_branch
+
+            status2 = ci_cache.get_successful(job, 0, NUM_BATCHES)
+            assert status2 and status2.pr_num == PR_NUM
+            status2 = ci_cache.get_successful(job, 1, NUM_BATCHES)
+            assert status2 is None
+
+        ### add some more pending statuses for two batches and for a release branch
+        for job in JobNames:
+            ci_cache.push_pending(
+                job, batches=[0, 1], num_batches=NUM_BATCHES, release_branch=True
+            )
+
+        ### add success statuses for 0 batch and release branch
+        PR_NUM = 234
+        status = CommitStatusData(
+            status="success",
+            report_url="dummy url",
+            description="OK OK OK",
+            sha="deadbeaf2",
+            pr_num=PR_NUM,
+        )
+        for job in JobNames:
+            ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=True)
+
+        ### check number of cache files is as expected
+        FILES_PER_JOB = 6  # 1 successful + 1 successful_release + 2 pending batches + 2 pending batches release = 6
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}"
+                ]
+            ),
+            NONDOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+        self.assertEqual(
+            len(
+                s3_mock.files_on_s3_paths[
+                    f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}"
+                ]
+            ),
+            DOCS_JOBS_NUM * FILES_PER_JOB,
+        )
+
+        ### check statuses
+        for job in JobNames:
+            self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True)
+            self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False)
+            self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, True), False)
+            self.assertEqual(
+                ci_cache.is_pending(job, 0, NUM_BATCHES, False), False
+            )  # it's success, not pending
+            self.assertEqual(
+                ci_cache.is_pending(job, 0, NUM_BATCHES, True), False
+            )  # it's success, not pending
+            self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True)
+
+            status2 = ci_cache.get_successful(job, 0, NUM_BATCHES)
+            assert status2 and status2.pr_num == PR_NUM
+            status2 = ci_cache.get_successful(job, 1, NUM_BATCHES)
+            assert status2 is None
+
+        ### create new cache object and verify the same checks
+        ci_cache = CiCache(s3_mock, DIGESTS)
+        for job in JobNames:
+            self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True)
+            self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False)
+            self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, True), False)
+            self.assertEqual(
+                ci_cache.is_pending(job, 0, NUM_BATCHES, False), False
+            )  # it's success, not pending
+            self.assertEqual(
+                ci_cache.is_pending(job, 0, NUM_BATCHES, True), False
+            )  # it's success, not pending
+            self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True)
+
+            status2 = ci_cache.get_successful(job, 0, NUM_BATCHES)
+            assert status2 and status2.pr_num == PR_NUM
+            status2 = ci_cache.get_successful(job, 1, NUM_BATCHES)
+            assert status2 is None
+
+        ### check some job values which are not in the cache
+        self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES + 1, False), False)
+        self.assertEqual(
+            ci_cache.is_successful(job, NUM_BATCHES - 1, NUM_BATCHES, False), False
+        )
+        self.assertEqual(ci_cache.is_pending(job, 0, NUM_BATCHES + 1, False), False)
+        self.assertEqual(
+            ci_cache.is_pending(job, NUM_BATCHES - 1, NUM_BATCHES, False), False
+        )
+
+
+if __name__ == "__main__":
+    TestCiCache().test_cache()

From 7be91e66dda4662d7faf78727ac57edb446ff49e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 2 Feb 2024 18:11:56 +0100
Subject: [PATCH 302/884] Add reverse step

---
 src/Client/QueryFuzzer.cpp | 31 +++++++++++++++++++++++++++----
 src/Client/QueryFuzzer.h   |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index bb551fcb11e..24be7491ec7 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -915,18 +915,35 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
             "toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
     }
 
-    if (fuzz_rand() % 11 == 0)
+    if (fuzz_rand() % 7 == 0)
         child = makeASTFunction("toNullable", child);
 
-    if (fuzz_rand() % 11 == 0)
+    if (fuzz_rand() % 7 == 0)
         child = makeASTFunction("toLowCardinality", child);
 
-    if (fuzz_rand() % 11 == 0)
+    if (fuzz_rand() % 7 == 0)
         child = makeASTFunction("materialize", child);
 
     return child;
 }
 
+/// Tries to remove the functions added in fuzzLiteralUnderExpressionList
+/// Note that it removes them even if the child is not a literal
+ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child)
+{
+    if (auto * function = child.get()->as<ASTFunction>())
+    {
+        std::unordered_set<String> can_be_reverted{"toNullable", "toLowCardinality", "materialize"};
+        if (can_be_reverted.contains(function->name) && function->children.size() == 1)
+        {
+            if (fuzz_rand() % 7 == 0)
+                return function->children[0];
+        }
+    }
+
+    return nullptr;
+}
+
 
 void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
 {
@@ -938,7 +955,13 @@ void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
                 child = fuzzLiteralUnderExpressionList(child);
         }
         else
-            fuzz(child);
+        {
+            auto new_child = reverseLiteralFuzzing(child);
+            if (new_child)
+                child = new_child;
+            else
+                fuzz(child);
+        }
     }
 }
 
diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h
index cdeba2b76fd..f5465626d96 100644
--- a/src/Client/QueryFuzzer.h
+++ b/src/Client/QueryFuzzer.h
@@ -96,6 +96,7 @@ struct QueryFuzzer
     void fuzzColumnDeclaration(ASTColumnDeclaration & column);
     void fuzzTableName(ASTTableExpression & table);
     ASTPtr fuzzLiteralUnderExpressionList(ASTPtr child);
+    ASTPtr reverseLiteralFuzzing(ASTPtr child);
     void fuzzExpressionList(ASTExpressionList & expr_list);
     void fuzz(ASTs & asts);
     void fuzz(ASTPtr & ast);

From 6504dfe0b2cefa9bbc8d83328cddbfae13ee329c Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 25 Jan 2024 19:46:57 +0000
Subject: [PATCH 303/884] CI: Enable ARM integration test  #ci_set_integration
 #no_merge_commit

---
 .github/workflows/pull_request.yml        | 10 ++++++++
 .gitmessage                               |  1 +
 docker/images.json                        |  1 -
 docker/test/integration/runner/Dockerfile |  2 +-
 tests/ci/ci.py                            | 13 +++++-----
 tests/ci/ci_config.py                     | 30 +++++++++++++++++++++++
 tests/ci/integration_test_check.py        |  5 ++--
 7 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 9c08363f674..09e2d6dbb97 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -785,6 +785,15 @@ jobs:
       test_name: Integration tests (release)
       runner_type: stress-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  IntegrationTestsAarch64:
+    needs: [RunConfig, BuilderDebAarch64]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (aarch64)
+      # FIXME: there is no stress-tester for aarch64. func-tester-aarch64 is ok?
+      runner_type: func-tester-aarch64
+      data: ${{ needs.RunConfig.outputs.data }}
   IntegrationTestsFlakyCheck:
     needs: [RunConfig, BuilderDebAsan]
     if: ${{ !failure() && !cancelled() }}
@@ -924,6 +933,7 @@ jobs:
       - IntegrationTestsAnalyzerAsan
       - IntegrationTestsTsan
       - IntegrationTestsRelease
+      - IntegrationTestsAarch64
       - IntegrationTestsFlakyCheck
       - PerformanceComparisonX86
       - PerformanceComparisonAarch
diff --git a/.gitmessage b/.gitmessage
index 098b66aab1c..200d19e774d 100644
--- a/.gitmessage
+++ b/.gitmessage
@@ -11,6 +11,7 @@
 ## To run specified set of tests in CI:
 #ci_set_<SET_NAME>
 #ci_set_reduced
+#ci_set_arm
 
 ## To run specified job in CI:
 #job_<JOB NAME>
diff --git a/docker/images.json b/docker/images.json
index d2f098f53d7..2bf1efe005f 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -62,7 +62,6 @@
         "dependent": []
     },
     "docker/test/integration/runner": {
-        "only_amd64": true,
         "name": "clickhouse/integration-tests-runner",
         "dependent": []
     },
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index c795fbf0672..7bdc82c7fa2 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -70,7 +70,7 @@ RUN python3 -m pip install --no-cache-dir \
     azure-storage-blob \
     boto3 \
     cassandra-driver \
-    confluent-kafka==1.9.2 \
+    confluent-kafka==2.3.0 \
     delta-spark==2.3.0 \
     dict2xml \
     dicttoxml \
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 52879a1a778..12e27b532db 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1072,12 +1072,13 @@ def _configure_jobs(
 
         if batches_to_do:
             jobs_to_do.append(job)
-            jobs_params[job] = {
-                "batches": batches_to_do,
-                "num_batches": num_batches,
-            }
-        else:
+        elif not job_config.run_by_label:
+            # treat job as being skipped only if it's controlled by digest
             jobs_to_skip.append(job)
+        jobs_params[job] = {
+            "batches": batches_to_do,
+            "num_batches": num_batches,
+        }
 
     ## c. check CI controlling labels and commit messages
     if pr_info.labels:
@@ -1136,7 +1137,7 @@ def _configure_jobs(
                 f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]"
             )
             jobs_to_do = list(
-                set(job for job in jobs_to_do_requested if job in jobs_to_do)
+                set(job for job in jobs_to_do_requested if job not in jobs_to_skip)
             )
 
     return {
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 347de73b5ed..23270aae8ec 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -19,6 +19,8 @@ class Labels(metaclass=WithIter):
     NO_MERGE_COMMIT = "no_merge_commit"
     NO_CI_CACHE = "no_ci_cache"
     CI_SET_REDUCED = "ci_set_reduced"
+    CI_SET_ARM = "ci_set_arm"
+    CI_SET_INTEGRATION = "ci_set_integration"
 
 
 class Build(metaclass=WithIter):
@@ -90,6 +92,7 @@ class JobNames(metaclass=WithIter):
     INTEGRATION_TEST_ASAN = "Integration tests (asan)"
     INTEGRATION_TEST_ASAN_ANALYZER = "Integration tests (asan, analyzer)"
     INTEGRATION_TEST_TSAN = "Integration tests (tsan)"
+    INTEGRATION_TEST_ARM = "Integration tests (aarch64)"
     INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)"
 
     UPGRADE_TEST_DEBUG = "Upgrade check (debug)"
@@ -615,6 +618,28 @@ class CiConfig:
 CI_CONFIG = CiConfig(
     label_configs={
         Labels.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]),
+        Labels.CI_SET_ARM: LabelConfig(
+            run_jobs=[
+                # JobNames.STYLE_CHECK,
+                Build.PACKAGE_AARCH64,
+                JobNames.INTEGRATION_TEST_ARM,
+            ]
+        ),
+        Labels.CI_SET_INTEGRATION: LabelConfig(
+            run_jobs=[
+                JobNames.STYLE_CHECK,
+                Build.PACKAGE_ASAN,
+                Build.PACKAGE_RELEASE,
+                Build.PACKAGE_TSAN,
+                Build.PACKAGE_AARCH64,
+                JobNames.INTEGRATION_TEST_ASAN,
+                JobNames.INTEGRATION_TEST_ARM,
+                JobNames.INTEGRATION_TEST,
+                JobNames.INTEGRATION_TEST_ASAN_ANALYZER,
+                JobNames.INTEGRATION_TEST_TSAN,
+                JobNames.INTEGRATION_TEST_FLAKY,
+            ]
+        ),
         Labels.CI_SET_REDUCED: LabelConfig(
             run_jobs=[
                 job
@@ -996,6 +1021,11 @@ CI_CONFIG = CiConfig(
             Build.PACKAGE_TSAN,
             job_config=JobConfig(num_batches=6, **integration_test_common_params),  # type: ignore
         ),
+        JobNames.INTEGRATION_TEST_ARM: TestConfig(
+            Build.PACKAGE_AARCH64,
+            # add [run_by_label="test arm"] to not run in regular pr workflow by default
+            job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"),  # type: ignore
+        ),
         # FIXME: currently no wf has this job. Try to enable
         # "Integration tests (msan)": TestConfig(Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore
         # ),
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index 18b3d2c2898..fa2529d1b89 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -18,7 +18,7 @@ from commit_status_helper import (
     post_commit_status,
     post_commit_status_to_file,
 )
-from docker_images_helper import DockerImage, get_docker_image, pull_image
+from docker_images_helper import DockerImage, get_docker_image
 from download_release_packages import download_last_release
 from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from get_robot_token import get_best_robot_token
@@ -192,7 +192,8 @@ def main():
     gh = GitHub(get_best_robot_token())
     commit = get_commit(gh, pr_info.sha)
 
-    images = [pull_image(get_docker_image(i)) for i in IMAGES]
+    images = [get_docker_image(image_) for image_ in IMAGES]
+
     result_path = temp_path / "output_dir"
     result_path.mkdir(parents=True, exist_ok=True)
 

From c7499b7395b5bc7d70895a040e967fed85cee070 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 2 Feb 2024 18:43:09 +0000
Subject: [PATCH 304/884] Use ConnectionPoolPtr instead of raw pointer

---
 src/Client/ConnectionEstablisher.cpp      |  9 +++++----
 src/Client/ConnectionEstablisher.h        |  6 +++---
 src/Client/ConnectionPoolWithFailover.cpp | 14 +++++++-------
 src/Client/ConnectionPoolWithFailover.h   |  2 +-
 src/Client/HedgedConnectionsFactory.cpp   |  5 +++--
 src/Common/PoolWithFailoverBase.h         |  8 ++++----
 6 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp
index a9009e5bb25..bba640544d6 100644
--- a/src/Client/ConnectionEstablisher.cpp
+++ b/src/Client/ConnectionEstablisher.cpp
@@ -22,12 +22,12 @@ namespace ErrorCodes
 }
 
 ConnectionEstablisher::ConnectionEstablisher(
-    IConnectionPool * pool_,
+    ConnectionPoolPtr pool_,
     const ConnectionTimeouts * timeouts_,
     const Settings & settings_,
     LoggerPtr log_,
     const QualifiedTableName * table_to_check_)
-    : pool(pool_), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
+    : pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
 {
 }
 
@@ -111,12 +111,13 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
 #if defined(OS_LINUX)
 
 ConnectionEstablisherAsync::ConnectionEstablisherAsync(
-    IConnectionPool * pool_,
+    ConnectionPoolPtr pool_,
     const ConnectionTimeouts * timeouts_,
     const Settings & settings_,
     LoggerPtr log_,
     const QualifiedTableName * table_to_check_)
-    : AsyncTaskExecutor(std::make_unique<Task>(*this)), connection_establisher(pool_, timeouts_, settings_, log_, table_to_check_)
+    : AsyncTaskExecutor(std::make_unique<Task>(*this))
+    , connection_establisher(std::move(pool_), timeouts_, settings_, log_, table_to_check_)
 {
     epoll.add(timeout_descriptor.getDescriptor());
 }
diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index 7ea65708b1d..62e80198d03 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -20,7 +20,7 @@ class ConnectionEstablisher
 public:
     using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
 
-    ConnectionEstablisher(IConnectionPool * pool_,
+    ConnectionEstablisher(ConnectionPoolPtr pool_,
                           const ConnectionTimeouts * timeouts_,
                           const Settings & settings_,
                           LoggerPtr log,
@@ -35,7 +35,7 @@ public:
     bool isFinished() const { return is_finished; }
 
 private:
-    IConnectionPool * pool;
+    ConnectionPoolPtr pool;
     const ConnectionTimeouts * timeouts;
     const Settings & settings;
     LoggerPtr log;
@@ -58,7 +58,7 @@ class ConnectionEstablisherAsync : public AsyncTaskExecutor
 public:
     using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
 
-    ConnectionEstablisherAsync(IConnectionPool * pool_,
+    ConnectionEstablisherAsync(ConnectionPoolPtr pool_,
                                const ConnectionTimeouts * timeouts_,
                                const Settings & settings_,
                                LoggerPtr log_,
diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index c05fbb317c8..fdc0a11e533 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -63,7 +63,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
         throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
                             "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");
 
-    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
+    TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
     {
         return tryGetEntry(pool, timeouts, fail_message, settings, {});
     };
@@ -126,7 +126,7 @@ std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(
     std::optional<bool> skip_unavailable_endpoints,
     GetPriorityForLoadBalancing::Func priority_func)
 {
-    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
+    TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
     { return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); };
 
     std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
@@ -143,7 +143,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
     const Settings & settings,
     PoolMode pool_mode)
 {
-    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
+    TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
     {
         return tryGetEntry(pool, timeouts, fail_message, settings);
     };
@@ -160,7 +160,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
     std::optional<bool> skip_unavailable_endpoints,
     GetPriorityForLoadBalancing::Func priority_func)
 {
-    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
+    TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
     { return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); };
 
     return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
@@ -216,7 +216,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 
 ConnectionPoolWithFailover::TryResult
 ConnectionPoolWithFailover::tryGetEntry(
-        IConnectionPool & pool,
+        const ConnectionPoolPtr & pool,
         const ConnectionTimeouts & timeouts,
         std::string & fail_message,
         const Settings & settings,
@@ -226,7 +226,7 @@ ConnectionPoolWithFailover::tryGetEntry(
 #if defined(OS_LINUX)
     if (async_callback)
     {
-        ConnectionEstablisherAsync connection_establisher_async(&pool, &timeouts, settings, log, table_to_check);
+        ConnectionEstablisherAsync connection_establisher_async(pool, &timeouts, settings, log, table_to_check);
         while (true)
         {
             connection_establisher_async.resume();
@@ -246,7 +246,7 @@ ConnectionPoolWithFailover::tryGetEntry(
     }
 #endif
 
-    ConnectionEstablisher connection_establisher(&pool, &timeouts, settings, log, table_to_check);
+    ConnectionEstablisher connection_establisher(pool, &timeouts, settings, log, table_to_check);
     TryResult result;
     connection_establisher.run(result, fail_message);
     return result;
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index eaef717a2d6..7ccdd4787a4 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -115,7 +115,7 @@ private:
     /// If table_to_check is not null and the check is enabled in settings, check that replication delay
     /// for this table is not too large.
     TryResult tryGetEntry(
-            IConnectionPool & pool,
+            const ConnectionPoolPtr & pool,
             const ConnectionTimeouts & timeouts,
             std::string & fail_message,
             const Settings & settings,
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
index 01f9a32ce75..d226f8c9516 100644
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -41,8 +41,9 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
     , skip_unavailable_shards(skip_unavailable_shards_)
 {
     shuffled_pools = pool->getShuffledPools(settings_, priority_func);
-    for (auto shuffled_pool : shuffled_pools)
-        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
+    for (const auto & shuffled_pool : shuffled_pools)
+        replicas.emplace_back(
+            std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
 }
 
 HedgedConnectionsFactory::~HedgedConnectionsFactory()
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 6da4445950c..c54738cb892 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -101,7 +101,7 @@ public:
 
     struct ShuffledPool
     {
-        NestedPool * pool{};
+        NestedPoolPtr pool{};
         const PoolState * state{}; // WARNING: valid only during initial ordering, dangling
         size_t index = 0;
         size_t error_count = 0;
@@ -110,7 +110,7 @@ public:
 
     /// This functor must be provided by a client. It must perform a single try that takes a connection
     /// from the provided pool and checks that it is good.
-    using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>;
+    using TryGetEntryFunc = std::function<TryResult(const NestedPoolPtr & pool, std::string & fail_message)>;
 
     /// The client can provide this functor to affect load balancing - the index of a pool is passed to
     /// this functor. The pools with lower result value will be tried first.
@@ -181,7 +181,7 @@ PoolWithFailoverBase<TNestedPool>::getShuffledPools(
     std::vector<ShuffledPool> shuffled_pools;
     shuffled_pools.reserve(nested_pools.size());
     for (size_t i = 0; i < nested_pools.size(); ++i)
-        shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, /* error_count = */ 0, /* slowdown_count = */ 0});
+        shuffled_pools.push_back(ShuffledPool{nested_pools[i], &pool_states[i], i, /* error_count = */ 0, /* slowdown_count = */ 0});
 
     ::sort(
         shuffled_pools.begin(), shuffled_pools.end(),
@@ -267,7 +267,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
                 continue;
 
             std::string fail_message;
-            result = try_get_entry(*shuffled_pool.pool, fail_message);
+            result = try_get_entry(shuffled_pool.pool, fail_message);
 
             if (!fail_message.empty())
                 fail_messages += fail_message + '\n';

From 736314015003f999303c9e025fbf624f973af9d8 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 2 Feb 2024 22:28:06 +0100
Subject: [PATCH 305/884] remove unneeded field

---
 src/Processors/Transforms/AggregatingTransform.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h
index e05528afdc7..e167acde067 100644
--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@@ -71,16 +71,12 @@ struct AggregatingTransformParams
 struct ManyAggregatedData
 {
     ManyAggregatedDataVariants variants;
-    std::vector<std::unique_ptr<std::mutex>> mutexes;
     std::atomic<UInt32> num_finished = 0;
 
-    explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads), mutexes(num_threads)
+    explicit ManyAggregatedData(size_t num_threads = 0) : variants(num_threads)
     {
         for (auto & elem : variants)
             elem = std::make_shared<AggregatedDataVariants>();
-
-        for (auto & mut : mutexes)
-            mut = std::make_unique<std::mutex>();
     }
 
     ~ManyAggregatedData()

From e822ba3a47dea82e7d945f8cdd95591c3c1aa45c Mon Sep 17 00:00:00 2001
From: Nickolaj Jepsen <nij@billigvvs.dk>
Date: Fri, 2 Feb 2024 22:50:10 +0100
Subject: [PATCH 306/884] Add support for NATS credentials file

---
 docs/en/engines/table-engines/integrations/nats.md | 2 ++
 src/Storages/NATS/NATSConnection.cpp               | 2 ++
 src/Storages/NATS/NATSConnection.h                 | 1 +
 src/Storages/NATS/NATSSettings.h                   | 1 +
 src/Storages/NATS/StorageNATS.cpp                  | 2 ++
 5 files changed, 8 insertions(+)

diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md
index e898d1f1b82..9f7409a6893 100644
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@@ -38,6 +38,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     [nats_username = 'user',]
     [nats_password = 'password',]
     [nats_token = 'clickhouse',]
+    [nats_credential_file = '/var/nats_credentials',]
     [nats_startup_connect_tries = '5']
     [nats_max_rows_per_message = 1,]
     [nats_handle_error_mode = 'default']
@@ -63,6 +64,7 @@ Optional parameters:
 - `nats_username` - NATS username.
 - `nats_password` - NATS password.
 - `nats_token` - NATS auth token.
+- `nats_credential_file` - Path to a NATS credentials file.
 - `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`.
 - `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`).
 - `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`).
diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp
index d7ad0cf8219..4d30d6b2360 100644
--- a/src/Storages/NATS/NATSConnection.cpp
+++ b/src/Storages/NATS/NATSConnection.cpp
@@ -91,6 +91,8 @@ void NATSConnectionManager::connectImpl()
         natsOptions_SetUserInfo(options, configuration.username.c_str(), configuration.password.c_str());
     if (!configuration.token.empty())
         natsOptions_SetToken(options, configuration.token.c_str());
+    if (!configuration.credential_file.empty())
+        natsOptions_SetUserCredentialsFromFiles(options, configuration.credential_file.c_str(), nullptr);
 
     if (configuration.secure)
     {
diff --git a/src/Storages/NATS/NATSConnection.h b/src/Storages/NATS/NATSConnection.h
index c350f395a92..859fcb72022 100644
--- a/src/Storages/NATS/NATSConnection.h
+++ b/src/Storages/NATS/NATSConnection.h
@@ -14,6 +14,7 @@ struct NATSConfiguration
     String username;
     String password;
     String token;
+    String credential_file;
 
     int max_reconnect;
     int reconnect_wait;
diff --git a/src/Storages/NATS/NATSSettings.h b/src/Storages/NATS/NATSSettings.h
index 3e3ed739d82..3273a5ff065 100644
--- a/src/Storages/NATS/NATSSettings.h
+++ b/src/Storages/NATS/NATSSettings.h
@@ -25,6 +25,7 @@ class ASTStorage;
     M(String, nats_username, "", "NATS username", 0) \
     M(String, nats_password, "", "NATS password", 0) \
     M(String, nats_token, "", "NATS token", 0) \
+    M(String, nats_credential_file, "", "Path to a NATS credentials file", 0) \
     M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \
     M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \
     M(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \
diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp
index 2af9a9f974f..4b6ff1d8f2a 100644
--- a/src/Storages/NATS/StorageNATS.cpp
+++ b/src/Storages/NATS/StorageNATS.cpp
@@ -67,6 +67,7 @@ StorageNATS::StorageNATS(
     auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username);
     auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password);
     auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token);
+    auto nats_credential_file = getContext()->getMacros()->expand(nats_settings->nats_credential_file);
 
     configuration =
     {
@@ -75,6 +76,7 @@ StorageNATS::StorageNATS(
         .username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username,
         .password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password,
         .token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token,
+        .credential_file = nats_credential_file.empty() ? getContext()->getConfigRef().getString("nats.credential_file", "") : nats_credential_file,
         .max_reconnect = static_cast<int>(nats_settings->nats_max_reconnect.value),
         .reconnect_wait = static_cast<int>(nats_settings->nats_reconnect_wait.value),
         .secure = nats_settings->nats_secure.value

From e79ddd54afa54dc0c964774899f7250514741004 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Sat, 3 Feb 2024 00:56:37 +0000
Subject: [PATCH 307/884] fix tests

---
 src/Functions/FunctionBinaryArithmetic.h      | 19 +++----------------
 .../00700_decimal_arithm.reference            |  6 +++---
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 9b0afee5053..4d768311aaf 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -146,7 +146,6 @@ private: /// it's not correct for Decimal
 
 public:
     static constexpr bool allow_decimal = IsOperation<Operation>::allow_decimal;
-    static constexpr bool only_integer = IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero;
 
     /// Appropriate result type for binary operator on numeric types. "Date" can also mean
     /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
@@ -626,7 +625,10 @@ private:
             if constexpr (op_case == OpCase::RightConstant)
             {
                 if ((*right_nullmap)[0])
+                {
+                    c[0] = ResultType();
                     return;
+                }
 
                 for (size_t i = 0; i < size; ++i)
                     c[i] = apply_func(undec(a[i]), undec(b));
@@ -2094,21 +2096,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                     DataTypePtr type_res;
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr (is_division)
-                        {
-                            if (context->getSettingsRef().decimal_check_overflow)
-                            {
-                                /// Check overflow by using operands scale (based on big decimal division implementation details):
-                                /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
-                                /// i.e. int_operand = decimal_operand*10^scale
-                                /// For division, left operand will be scaled by right operand scale also to do big integer division,
-                                /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
-                                /// So, we can check upfront possible overflow just by checking max scale used for left operand
-                                /// Note: it doesn't detect all possible overflow during big decimal division
-                                if (left.getScale() + right.getScale() > DecimalResultType::maxPrecision())
-                                    throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
-                            }
-                        }
                         DecimalResultType result_type = decimalResultType<is_multiply, is_division>(left, right);
                         type_res = std::make_shared<DecimalResultType>(result_type.getPrecision(), result_type.getScale());
                     }
diff --git a/tests/queries/0_stateless/00700_decimal_arithm.reference b/tests/queries/0_stateless/00700_decimal_arithm.reference
index 20f04696b1b..109c0632fb1 100644
--- a/tests/queries/0_stateless/00700_decimal_arithm.reference
+++ b/tests/queries/0_stateless/00700_decimal_arithm.reference
@@ -18,10 +18,10 @@
 63	-21	42	882	-882	0	2	0	2
 63	-21	42	882	-882	0	2	0	2
 1.00305798474369219219752355409390731264	0.16305798474369219219752355409390731264	-1.490591730234615865843651857942052864	-1.38847100762815390390123822295304634368	1.38847100762815390390123822295304634368	-0.00000000000000000000000000000000000001	0.00000000000000000000000000000000000001
-63.42	-21.42	41.58	890.82	-890.82	0.495	1.98	0	2
+63.42	-21.42	41.58	890.82	-890.82	0.495	1.98	0	1
 63.42	-21.42	41.58	890.82	-890.82
-63.42	-21.42	41.58	890.82	-890.82	0.495049504950495049	1.980198019801980198	0	2
-63.42	-21.42	41.58	890.82	-890.82	0.49	1.98	0	2
+63.42	-21.42	41.58	890.82	-890.82	0.495049504950495049	1.980198019801980198	0	1
+63.42	-21.42	41.58	890.82	-890.82	0.49	1.98	0	1
 -42	42	42	42	0.42	0.42	0.42	42.42	42.42	42.42
 0	0	0	0	0	0	0	0	0	0
 42	-42	-42	-42	-0.42	-0.42	-0.42	-42.42	-42.42	-42.42

From 82caaa744a51ba11e0b3be803956c875b43a6aad Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Sat, 3 Feb 2024 16:08:41 +0100
Subject: [PATCH 308/884] Produce stream of chunks instead of accumulating the
 whole result

---
 .../System/StorageSystemZooKeeper.cpp         | 67 ++++++++++++++-----
 1 file changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 7a2b830b088..cacd7cc2133 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -180,7 +180,7 @@ using Paths = std::deque<std::pair<String, ZkPathType>>;
 class ReadFromSystemZooKeeper final : public SourceStepWithFilter
 {
 public:
-    ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, ContextPtr context_);
+    ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, UInt64 max_block_size_, ContextPtr context_);
 
     String getName() const override { return "ReadFromSystemZooKeeper"; }
 
@@ -189,13 +189,40 @@ public:
     void applyFilters() override;
 
 private:
-    void fillData(MutableColumns & res_columns);
-
     std::shared_ptr<const StorageLimitsList> storage_limits;
+    const UInt64 max_block_size;
     ContextPtr context;
     Paths paths;
 };
 
+
+class SystemZooKeeperSource : public ISource
+{
+public:
+    SystemZooKeeperSource(
+        Paths && paths_,
+        Block header_,
+        UInt64 max_block_size_,
+        ContextPtr context_)
+        : ISource(header_)
+        , max_block_size(max_block_size_)
+        , paths(std::move(paths_))
+        , context(std::move(context_))
+    {
+    }
+
+    String getName() const override { return "SystemZooKeeper"; }
+
+protected:
+    Chunk generate() override;
+
+private:
+    const UInt64 max_block_size;
+    Paths paths;
+    ContextPtr context;
+};
+
+
 StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_)
         : IStorage(table_id_)
 {
@@ -211,11 +238,11 @@ void StorageSystemZooKeeper::read(
     SelectQueryInfo & query_info,
     ContextPtr context,
     QueryProcessingStage::Enum /*processed_stage*/,
-    size_t /*max_block_size*/,
+    size_t max_block_size,
     size_t /*num_streams*/)
 {
     auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals());
-    auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, context);
+    auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, max_block_size, context);
     query_plan.addStep(std::move(read_step));
 }
 
@@ -426,8 +453,15 @@ void ReadFromSystemZooKeeper::applyFilters()
     paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
 }
 
-void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
+
+Chunk SystemZooKeeperSource::generate()
 {
+    if (paths.empty())
+        return {};
+
+    MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
+    size_t row_count = 0;
+
     QueryStatusPtr query_status = context->getProcessListElement();
 
     const auto & settings = context->getSettingsRef();
@@ -471,7 +505,7 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
     };
     std::vector<ListTask> list_tasks;
     std::unordered_set<String> added;
-    while (!paths.empty())
+    while (!paths.empty() && (max_block_size == 0 || row_count < max_block_size))
     {
         if (query_status)
             query_status->checkTimeLimit();
@@ -519,8 +553,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
                 continue;
 
             auto & task = list_tasks[list_task_idx];
-            if (auto elem = context->getProcessListElement())
-                elem->checkTimeLimit();
+            if (query_status)
+                query_status->checkTimeLimit();
 
             Strings nodes = std::move(list_result.names);
 
@@ -584,17 +618,22 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
             res_columns[col_num++]->insert(
                 list_task.path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
 
+            ++row_count;
+
             if (list_task.path_type != ZkPathType::Exact && res.stat.numChildren > 0)
             {
                 paths.emplace_back(key, ZkPathType::Recurse);
             }
         }
     }
+
+    return Chunk(std::move(res_columns), row_count);
 }
 
-ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, ContextPtr context_)
+ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, UInt64 max_block_size_, ContextPtr context_)
     : SourceStepWithFilter({.header = header})
     , storage_limits(query_info.storage_limits)
+    , max_block_size(max_block_size_)
     , context(std::move(context_))
 {
 }
@@ -602,13 +641,7 @@ ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQue
 void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     const auto & header = getOutputStream().header;
-    MutableColumns res_columns = header.cloneEmptyColumns();
-    fillData(res_columns);
-
-    UInt64 num_rows = res_columns.at(0)->size();
-    Chunk chunk(std::move(res_columns), num_rows);
-
-    auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
+    auto source = std::make_shared<SystemZooKeeperSource>(std::move(paths), header, max_block_size, context);
     source->setStorageLimits(storage_limits);
     processors.emplace_back(source);
     pipeline.init(Pipe(std::move(source)));

From 6a2867819ce091c313b1a14c3c39d42244553dab Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Sat, 3 Feb 2024 18:00:50 +0100
Subject: [PATCH 309/884] Test for filters w/ and w/o path column

---
 .../02976_system_zookeeper_filters.reference    |  6 ++++++
 .../02976_system_zookeeper_filters.sql          | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 tests/queries/0_stateless/02976_system_zookeeper_filters.reference
 create mode 100644 tests/queries/0_stateless/02976_system_zookeeper_filters.sql

diff --git a/tests/queries/0_stateless/02976_system_zookeeper_filters.reference b/tests/queries/0_stateless/02976_system_zookeeper_filters.reference
new file mode 100644
index 00000000000..a9e2f17562a
--- /dev/null
+++ b/tests/queries/0_stateless/02976_system_zookeeper_filters.reference
@@ -0,0 +1,6 @@
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/02976_system_zookeeper_filters.sql b/tests/queries/0_stateless/02976_system_zookeeper_filters.sql
new file mode 100644
index 00000000000..3664c817da7
--- /dev/null
+++ b/tests/queries/0_stateless/02976_system_zookeeper_filters.sql
@@ -0,0 +1,17 @@
+-- Tags: zookeeper, no-parallel, no-fasttest, long
+
+SET allow_unrestricted_reads_from_keeper = 'false';
+
+SELECT count() > 0 FROM system.zookeeper; -- { serverError BAD_ARGUMENTS }
+SELECT count() > 0 FROM system.zookeeper WHERE name LIKE '%_%'; -- { serverError BAD_ARGUMENTS }
+SELECT count() > 0 FROM system.zookeeper WHERE value LIKE '%'; -- { serverError BAD_ARGUMENTS }
+SELECT count() > 0 FROM system.zookeeper WHERE path LIKE '/%'; -- { serverError BAD_ARGUMENTS }
+SELECT count() > 0 FROM system.zookeeper WHERE path = '/';
+
+SET allow_unrestricted_reads_from_keeper = 'true';
+
+SELECT count() > 0 FROM system.zookeeper;
+SELECT count() > 0 FROM system.zookeeper WHERE name LIKE '%_%';
+SELECT count() > 0 FROM system.zookeeper WHERE value LIKE '%';
+SELECT count() > 0 FROM system.zookeeper WHERE path LIKE '/%';
+SELECT count() > 0 FROM system.zookeeper WHERE path = '/';

From e3716b0f38ec2f0da2cd332f59bf53bbf9fb92d7 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 4 Feb 2024 11:03:18 +0800
Subject: [PATCH 310/884] Backward compatibility of uncompressed state

---
 programs/server/dashboard.html | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index 1f32048da79..ea81c134d07 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -1450,7 +1450,13 @@ window.onpopstate = function(event) {
 if (window.location.hash) {
     try {
         let search_query_, customized_;
-        ({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
+        try {
+            ({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
+        } catch {
+            // For compatibility with uncompressed state
+            ({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
+        }
+
         // For compatibility with old URLs' hashes
         search_query = search_query_ !== undefined ? search_query_ : search_query;
         customized = customized_ !== undefined ? customized_ : true;

From 9e1a3c7c24b6499babac7dd06383af8872fc9fbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com>
Date: Sun, 4 Feb 2024 14:25:04 +0800
Subject: [PATCH 311/884] Update src/AggregateFunctions/AggregateFunctionSum.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Raúl Marín <git@rmr.ninja>
---
 src/AggregateFunctions/AggregateFunctionSum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index b3ba7cc7f57..53ddec597b8 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -161,7 +161,7 @@ struct AggregateFunctionSumData
             Impl::add(sum, local_sum);
             return;
         }
-        else if constexpr (is_integer<T> || is_decimal<T>)
+        else if constexpr (is_over_big_int<T>)
         {
             /// Use a mask to discard the value if it is null
             T local_sum{};

From d68a0e7b3e57b1b1471bd02531ebb0cadebf897e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com>
Date: Sun, 4 Feb 2024 14:25:14 +0800
Subject: [PATCH 312/884] Update src/AggregateFunctions/AggregateFunctionSum.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Raúl Marín <git@rmr.ninja>
---
 src/AggregateFunctions/AggregateFunctionSum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index 53ddec597b8..ac9e77c8a33 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -146,7 +146,7 @@ struct AggregateFunctionSumData
         size_t count = end - start;
         const auto * end_ptr = ptr + count;
 
-        if constexpr ((is_integer<T> || is_decimal<T>)&&!is_over_big_int<T>)
+        if constexpr ((is_integer<T> || is_decimal<T>) && !is_over_big_int<T>)
         {
             /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
             /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I

From be19fb9935d0810f25cba17c96fe58de8eb85012 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sun, 4 Feb 2024 14:39:55 +0800
Subject: [PATCH 313/884] change as request

---
 src/AggregateFunctions/AggregateFunctionSum.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index ac9e77c8a33..58aaddf357a 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -163,19 +163,20 @@ struct AggregateFunctionSumData
         }
         else if constexpr (is_over_big_int<T>)
         {
-            /// Use a mask to discard the value if it is null
-            T local_sum{};
+            /// Use a mask to discard or keep the value to reduce branch miss.
+            /// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse.
             using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
             alignas(64) const MaskType masks[2] = {0, -1};
+            T local_sum{};
             while (ptr < end_ptr)
             {
                 Value v = *ptr;
                 if constexpr (!add_if_zero)
                 {
                     if constexpr (is_integer<T>)
-                        v &= masks[*condition_map];
+                        v &= masks[!!*condition_map];
                     else
-                        v.value &= masks[*condition_map];
+                        v.value &= masks[!!*condition_map];
                 }
                 else
                 {

From 549b77021d3c448cf8802c7923ca03c0bf9a2781 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sun, 4 Feb 2024 15:55:22 +0800
Subject: [PATCH 314/884] add some perf tests

---
 tests/performance/sum.xml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/performance/sum.xml b/tests/performance/sum.xml
index 57b879a360d..36b898436bf 100644
--- a/tests/performance/sum.xml
+++ b/tests/performance/sum.xml
@@ -17,6 +17,13 @@
     <query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
     <query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>
 
+    <query>select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000)</query>
+
     <!-- Create a table with ~20% null values. Make it random so the branch predictor doesn't do all the work -->
     <create_query>CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory</create_query>
     <fill_query>INSERT INTO nullfloat32

From 96b1ca7f8af39af852ab4a28ced7667f2015a09d Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sun, 4 Feb 2024 16:58:07 +0800
Subject: [PATCH 315/884] fix bugs

---
 src/Interpreters/castColumn.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp
index 906dfb84b14..2343d5b7b7a 100644
--- a/src/Interpreters/castColumn.cpp
+++ b/src/Interpreters/castColumn.cpp
@@ -43,6 +43,7 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar
 
 ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache)
 {
+    std::cout << "cast from " << arg.dumpStructure() << " to " << type->getName() << std::endl;
     return castColumn(CastType::nonAccurate, arg, type, cache);
 }
 

From b4972bf5fbb488b38c455b06d94fb4540e5ba7c6 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sun, 4 Feb 2024 16:59:26 +0800
Subject: [PATCH 316/884] revert files

---
 src/Functions/if.cpp            | 5 ++---
 src/Interpreters/castColumn.cpp | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 9d6badcb645..5346f3a811c 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -727,7 +727,6 @@ private:
                 conditional(ConstSource<GenericArraySource>(*col_arr_then_const), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(col_res->getData(), col_res->getOffsets(), rows), cond_data);
             else
                 return nullptr;
-
             return res;
         }
 
@@ -826,8 +825,8 @@ private:
             const DataTypeMap & type = static_cast<const DataTypeMap &>(*arg.type);
             const auto & key_type = type.getKeyType();
             const auto & value_type = type.getValueType();
-            key_columns[i + 1] = {key_cols[i], key_type, {}};
-            value_columns[i + 1] = {value_cols[i], value_type, {}};
+            key_columns[i + 1] = {key_cols[i], std::make_shared<DataTypeArray>(key_type), {}};
+            value_columns[i + 1] = {value_cols[i], std::make_shared<DataTypeArray>(value_type), {}};
         }
 
         /// Calculate function corresponding keys and values in map
diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp
index 2343d5b7b7a..906dfb84b14 100644
--- a/src/Interpreters/castColumn.cpp
+++ b/src/Interpreters/castColumn.cpp
@@ -43,7 +43,6 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar
 
 ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache)
 {
-    std::cout << "cast from " << arg.dumpStructure() << " to " << type->getName() << std::endl;
     return castColumn(CastType::nonAccurate, arg, type, cache);
 }
 

From b0994c5fa79ec6ba8dd14e028a6e2731d4e5f031 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 4 Feb 2024 11:28:20 +0100
Subject: [PATCH 317/884] Addressed comments, added test for named collection

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  10 +-
 src/Backups/BackupImpl.cpp                    |   1 -
 .../registerBackupEngineAzureBlobStorage.cpp  |  75 +++++++----
 .../AzureBlobStorage/AzureObjectStorage.cpp   |   1 +
 .../AzureBlobStorage/AzureObjectStorage.h     |   1 -
 .../copyAzureBlobStorageFile.cpp              |  19 +--
 src/Storages/StorageAzureBlob.cpp             |  11 --
 src/Storages/StorageAzureBlob.h               |   1 -
 .../test.py                                   | 123 ++++++++++++++++--
 9 files changed, 179 insertions(+), 63 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index d99f296cca1..27928e871ce 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -33,7 +33,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
-    : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderAzureBlobStorage"))
+    : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
@@ -96,8 +96,6 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const
 void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
                                     DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
 {
-    LOG_INFO(&Poco::Logger::get("BackupReaderAzureBlobStorage"), "Enter copyFileToDisk");
-
     auto destination_data_source_description = destination_disk->getDataSourceDescription();
     if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
         && (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
@@ -143,7 +141,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
-    : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterAzureBlobStorage"))
+    : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
@@ -225,14 +223,11 @@ bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
     {
         key = file_name;
     }
-    LOG_INFO(&Poco::Logger::get("BackupWriterAzureBlobStorage"), "Result fileExists   {} ", object_storage->exists(StoredObject(key)));
-
     return object_storage->exists(StoredObject(key));
 }
 
 UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
 {
-    LOG_INFO(&Poco::Logger::get("BackupWriterAzureBlobStorage"), "Enter getFileSize");
     String key;
     if (startsWith(file_name, "."))
     {
@@ -281,6 +276,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
         client,
         key,
         settings.get()->max_single_part_upload_size,
+        settings.get()->max_unexpected_write_error_retries,
         DBMS_DEFAULT_BUFFER_SIZE,
         write_settings);
 }
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 28a7d60b52c..8a4ed31bfd7 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -542,7 +542,6 @@ void BackupImpl::checkBackupDoesntExist() const
     if (!is_internal_backup)
     {
         assert(!lock_file_name.empty());
-        LOG_INFO(&Poco::Logger::get("BackupImpl"), "checkBackupDoesntExist 2");
         if (writer->fileExists(lock_file_name))
             throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name_for_logging);
     }
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 3480ea75f1f..48f66569304 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -49,40 +49,65 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
         const String & id_arg = params.backup_info.id_arg;
         const auto & args = params.backup_info.args;
 
-        LOG_INFO(&Poco::Logger::get("registerBackupEngineAzureBlobStorage"), "Begin id_arg={} args.size={}", id_arg, args.size());
-
         StorageAzureBlob::Configuration configuration;
 
-        if (args.size() == 3)
+        if (!id_arg.empty())
         {
-            configuration.connection_url = args[0].safeGet<String>();
-            configuration.is_connection_string = true;
+            const auto & config = params.context->getConfigRef();
+            auto config_prefix = "named_collections." + id_arg;
 
-            configuration.container =  args[1].safeGet<String>();
-            configuration.blob_path = args[2].safeGet<String>();
+            if (!config.has(config_prefix))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
 
-            LOG_TRACE(&Poco::Logger::get("registerBackupEngineAzureBlobStorage"), "configuration.connection_url = {}"
-                                                                                 "configuration.container = {}"
-                                                                                 "configuration.blob_path = {}",
-                                                                                 configuration.connection_url, configuration.container, configuration.blob_path);
-        }
-        else if (args.size() == 5)
-        {
-            configuration.connection_url = args[0].safeGet<String>();
-            configuration.is_connection_string = false;
+            if (config.has(config_prefix + ".connection_string"))
+            {
+                configuration.connection_url = config.getString(config_prefix + ".connection_string");
+                configuration.is_connection_string = true;
+                configuration.container = config.getString(config_prefix + ".container");
+            }
+            else
+            {
+                configuration.connection_url = config.getString(config_prefix + ".storage_account_url");
+                configuration.is_connection_string = false;
+                configuration.container =  config.getString(config_prefix + ".container");
+                configuration.account_name = config.getString(config_prefix + ".account_name");
+                configuration.account_key =  config.getString(config_prefix + ".account_key");
+            }
 
-            configuration.container =  args[1].safeGet<String>();
-            configuration.blob_path = args[2].safeGet<String>();
-            configuration.account_name = args[3].safeGet<String>();
-            configuration.account_key = args[4].safeGet<String>();
+            if (args.size() > 1)
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
+
+            if (args.size() == 1)
+                configuration.blob_path = args[0].safeGet<String>();
 
         }
         else
         {
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                                "Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
-        }
+            if (args.size() == 3)
+            {
+                configuration.connection_url = args[0].safeGet<String>();
+                configuration.is_connection_string = true;
 
+                configuration.container =  args[1].safeGet<String>();
+                configuration.blob_path = args[2].safeGet<String>();
+            }
+            else if (args.size() == 5)
+            {
+                configuration.connection_url = args[0].safeGet<String>();
+                configuration.is_connection_string = false;
+
+                configuration.container =  args[1].safeGet<String>();
+                configuration.blob_path = args[2].safeGet<String>();
+                configuration.account_name = args[3].safeGet<String>();
+                configuration.account_key = args[4].safeGet<String>();
+
+            }
+            else
+            {
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                                    "Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
+            }
+        }
 
         BackupImpl::ArchiveParams archive_params;
         if (hasRegisteredArchiveFileExtension(configuration.blob_path))
@@ -115,7 +140,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
                 params.base_backup_info,
                 reader,
                 params.context,
-                /*params.use_same_s3_credentials_for_base_backup*/ false);
+                /* use_same_s3_credentials_for_base_backup*/ false);
         }
         else
         {
@@ -134,7 +159,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
                 params.backup_coordination,
                 params.backup_uuid,
                 params.deduplicate_files,
-                /*params.use_same_s3_credentials_for_base_backup*/ false);
+                /* use_same_s3_credentials_for_base_backup */ false);
         }
 #else
         throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "AzureBlobStorage support is disabled");
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 962743b5668..74389aedb64 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -97,6 +97,7 @@ AzureObjectStorage::AzureObjectStorage(
     : name(name_)
     , client(std::move(client_))
     , settings(std::move(settings_))
+    , container(container_)
     , log(getLogger("AzureObjectStorage"))
 {
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 8556f0237e3..f16c35fb52c 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -52,7 +52,6 @@ struct AzureObjectStorageSettings
     size_t max_single_part_copy_size = 256 * 1024 * 1024;
     bool use_native_copy = false;
     size_t max_unexpected_write_error_retries = 4;
->>>>>>> master
 };
 
 using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index ff4cfe62feb..114a970384b 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -80,7 +80,8 @@ namespace
 
         struct UploadPartTask
         {
-            std::unique_ptr<ReadBuffer> read_buffer = nullptr;
+            size_t part_offset;
+            size_t part_size;
             std::vector<std::string> block_ids;
             bool is_finished = false;
             std::exception_ptr exception;
@@ -182,7 +183,8 @@ namespace
 
                 try
                 {
-                    task->read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
+                    task->part_offset = part_offset;
+                    task->part_size = part_size;
 
                     schedule([this, task, task_finish_notify]()
                     {
@@ -206,7 +208,8 @@ namespace
             else
             {
                 UploadPartTask task;
-                task.read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), part_offset, part_size);
+                task.part_offset = part_offset;
+                task.part_size = part_size;
                 processUploadPartRequest(task);
                 block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
             }
@@ -219,17 +222,17 @@ namespace
                 ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
 
             auto block_blob_client = client->GetBlockBlobClient(dest_blob);
-
-            while (!task.read_buffer->eof())
+            auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), task.part_offset, task.part_size);
+            while (!read_buffer->eof())
             {
-                  auto size = task.read_buffer->available();
+                  auto size = read_buffer->available();
                   if (size > 0)
                   {
                       auto block_id = getRandomASCIIString(64);
-                      Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(task.read_buffer->position()), size);
+                      Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(read_buffer->position()), size);
                       block_blob_client.StageBlock(block_id, memory);
                       task.block_ids.emplace_back(block_id);
-                      task.read_buffer->ignore(size);
+                      read_buffer->ignore(size);
                       LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id);
                   }
             }
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 67d67ea3fae..c09db0bfb7b 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -253,17 +253,6 @@ AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr loca
     return settings_ptr;
 }
 
-std::shared_ptr<AzureObjectStorageSettings> StorageAzureBlob::createSettingsAsSharedPtr(ContextPtr local_context)
-{
-    const auto & context_settings = local_context->getSettingsRef();
-    auto settings_ptr = std::make_shared<AzureObjectStorageSettings>();
-    settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size;
-    settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries;
-    settings_ptr->list_object_keys_size = static_cast<int32_t>(context_settings.azure_list_object_keys_size);
-
-    return settings_ptr;
-}
-
 void registerStorageAzureBlob(StorageFactory & factory)
 {
     factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args)
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 196983522bf..6fc3c5ce592 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -72,7 +72,6 @@ public:
     static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
 
     static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
-    static std::shared_ptr<AzureObjectStorageSettings> createSettingsAsSharedPtr(ContextPtr local_context);
 
     static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection);
 
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index 06c18d7468f..22aff39ce87 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -18,12 +18,45 @@ from helpers.mock_servers import start_mock_servers
 from helpers.test_tools import exec_query_with_retry
 
 
+def generate_cluster_def(port):
+    path = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "./_gen/named_collections.xml",
+    )
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w") as f:
+        f.write(
+            f"""<clickhouse>
+    <named_collections>
+        <azure_conf1>
+            <connection_string>DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1;</connection_string>
+            <container>cont</container>
+            <format>CSV</format>
+        </azure_conf1>
+        <azure_conf2>
+            <storage_account_url>http://azurite1:{port}/devstoreaccount1</storage_account_url>
+            <container>cont</container>
+            <format>CSV</format>
+            <account_name>devstoreaccount1</account_name>
+            <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
+        </azure_conf2>
+    </named_collections>
+</clickhouse>
+"""
+        )
+    return path
+
+
+
 @pytest.fixture(scope="module")
 def cluster():
     try:
         cluster = ClickHouseCluster(__file__)
+        port = cluster.azurite_port
+        path = generate_cluster_def(port)
         cluster.add_instance(
             "node",
+            main_configs=[path],
             with_azurite=True,
         )
         cluster.start()
@@ -123,15 +156,6 @@ def delete_all_files(cluster):
 
     yield
 
-
-def test_create_table_connection_string(cluster):
-    node = cluster.instances["node"]
-    azure_query(
-        node,
-        f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV')",
-    )
-
-
 def test_backup_restore(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -159,3 +183,84 @@ def test_backup_restore(cluster):
         azure_query(node, f"SELECT * from test_simple_write_connection_string_restored")
         == "1\ta\n"
     )
+
+def test_backup_restore_diff_container(cluster):
+    node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_simple_write_connection_string_cont1 (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_cont1.csv', 'CSV')",
+    )
+    azure_query(
+        node, f"INSERT INTO test_simple_write_connection_string_cont1 VALUES (1, 'a')"
+    )
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1.csv')"
+    azure_query(
+        node,
+        f"BACKUP TABLE test_simple_write_connection_string_cont1 TO {backup_destination}",
+    )
+    azure_query(
+        node,
+        f"RESTORE TABLE test_simple_write_connection_string_cont1 AS test_simple_write_connection_string_restored_cont1 FROM {backup_destination};",
+    )
+    assert (
+            azure_query(node, f"SELECT * from test_simple_write_connection_string_restored_cont1")
+            == "1\ta\n"
+    )
+
+
+def test_backup_restore_with_named_collection_azure_conf1(cluster):
+    node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write.csv', 'CSV')",
+    )
+    azure_query(
+        node, f"INSERT INTO test_write_connection_string VALUES (1, 'a')"
+    )
+    print(get_azure_file_content("test_simple_write.csv", port))
+    assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
+
+    backup_destination = f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')"
+    azure_query(
+        node,
+        f"BACKUP TABLE test_write_connection_string TO {backup_destination}",
+    )
+    print(get_azure_file_content("test_simple_write_nc_backup.csv.backup", port))
+    azure_query(
+        node,
+        f"RESTORE TABLE test_write_connection_string AS test_write_connection_string_restored FROM {backup_destination};",
+    )
+    assert (
+            azure_query(node, f"SELECT * from test_write_connection_string_restored")
+            == "1\ta\n"
+    )
+
+def test_backup_restore_with_named_collection_azure_conf2(cluster):
+    node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_write_connection_string_2 (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_2.csv', 'CSV')",
+    )
+    azure_query(
+        node, f"INSERT INTO test_write_connection_string_2 VALUES (1, 'a')"
+    )
+    print(get_azure_file_content("test_simple_write_2.csv", port))
+    assert get_azure_file_content("test_simple_write_2.csv", port) == '1,"a"\n'
+
+    backup_destination = f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')"
+    azure_query(
+        node,
+        f"BACKUP TABLE test_write_connection_string_2 TO {backup_destination}",
+    )
+    print(get_azure_file_content("test_simple_write_nc_backup_2.csv.backup", port))
+    azure_query(
+        node,
+        f"RESTORE TABLE test_write_connection_string_2 AS test_write_connection_string_restored_2 FROM {backup_destination};",
+    )
+    assert (
+            azure_query(node, f"SELECT * from test_write_connection_string_restored_2")
+            == "1\ta\n"
+    )

From ce6df0fb137a16c574f5be561205bd171e9ce3a5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 4 Feb 2024 10:37:05 +0000
Subject: [PATCH 318/884] Automatic style fix

---
 .../test.py                                   | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index 22aff39ce87..a7c7b439560 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -47,7 +47,6 @@ def generate_cluster_def(port):
     return path
 
 
-
 @pytest.fixture(scope="module")
 def cluster():
     try:
@@ -156,6 +155,7 @@ def delete_all_files(cluster):
 
     yield
 
+
 def test_backup_restore(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -184,6 +184,7 @@ def test_backup_restore(cluster):
         == "1\ta\n"
     )
 
+
 def test_backup_restore_diff_container(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -204,8 +205,10 @@ def test_backup_restore_diff_container(cluster):
         f"RESTORE TABLE test_simple_write_connection_string_cont1 AS test_simple_write_connection_string_restored_cont1 FROM {backup_destination};",
     )
     assert (
-            azure_query(node, f"SELECT * from test_simple_write_connection_string_restored_cont1")
-            == "1\ta\n"
+        azure_query(
+            node, f"SELECT * from test_simple_write_connection_string_restored_cont1"
+        )
+        == "1\ta\n"
     )
 
 
@@ -216,13 +219,13 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster):
         node,
         f"CREATE TABLE test_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write.csv', 'CSV')",
     )
-    azure_query(
-        node, f"INSERT INTO test_write_connection_string VALUES (1, 'a')"
-    )
+    azure_query(node, f"INSERT INTO test_write_connection_string VALUES (1, 'a')")
     print(get_azure_file_content("test_simple_write.csv", port))
     assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
 
-    backup_destination = f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')"
+    backup_destination = (
+        f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')"
+    )
     azure_query(
         node,
         f"BACKUP TABLE test_write_connection_string TO {backup_destination}",
@@ -233,10 +236,11 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster):
         f"RESTORE TABLE test_write_connection_string AS test_write_connection_string_restored FROM {backup_destination};",
     )
     assert (
-            azure_query(node, f"SELECT * from test_write_connection_string_restored")
-            == "1\ta\n"
+        azure_query(node, f"SELECT * from test_write_connection_string_restored")
+        == "1\ta\n"
     )
 
+
 def test_backup_restore_with_named_collection_azure_conf2(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -244,13 +248,13 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
         node,
         f"CREATE TABLE test_write_connection_string_2 (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_2.csv', 'CSV')",
     )
-    azure_query(
-        node, f"INSERT INTO test_write_connection_string_2 VALUES (1, 'a')"
-    )
+    azure_query(node, f"INSERT INTO test_write_connection_string_2 VALUES (1, 'a')")
     print(get_azure_file_content("test_simple_write_2.csv", port))
     assert get_azure_file_content("test_simple_write_2.csv", port) == '1,"a"\n'
 
-    backup_destination = f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')"
+    backup_destination = (
+        f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')"
+    )
     azure_query(
         node,
         f"BACKUP TABLE test_write_connection_string_2 TO {backup_destination}",
@@ -261,6 +265,6 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
         f"RESTORE TABLE test_write_connection_string_2 AS test_write_connection_string_restored_2 FROM {backup_destination};",
     )
     assert (
-            azure_query(node, f"SELECT * from test_write_connection_string_restored_2")
-            == "1\ta\n"
+        azure_query(node, f"SELECT * from test_write_connection_string_restored_2")
+        == "1\ta\n"
     )

From 9c93a762680eff1ada31d0e335d9e021eec1f557 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Sun, 4 Feb 2024 12:50:26 +0000
Subject: [PATCH 319/884] Fix backward incompatibility of dashboard url hashes

---
 programs/server/dashboard.html | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index ea81c134d07..ae916f2527e 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -1380,11 +1380,13 @@ document.getElementById('params').onsubmit = function(event) {
     event.preventDefault();
 }
 
+const decodeState = (x) => JSON.parse(LZString.decompressFromEncodedURIComponent(x) || atob(x));
+const encodeState = (x) => LZString.compressToEncodedURIComponent(JSON.stringify(x));
 
 function saveState() {
     const state = { host, user, queries, params, search_query, customized };
     history.pushState(state, '',
-        window.location.pathname + (window.location.search || '') + '#' + LZString.compressToEncodedURIComponent(JSON.stringify(state)));
+        window.location.pathname + (window.location.search || '') + '#' + encodeState(state));
 }
 
 async function searchQueries() {
@@ -1450,12 +1452,7 @@ window.onpopstate = function(event) {
 if (window.location.hash) {
     try {
         let search_query_, customized_;
-        try {
-            ({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
-        } catch {
-            // For compatibility with uncompressed state
-            ({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
-        }
+        ({host, user, queries, params, search_query_, customized_} = decodeState(window.location.hash.substring(1)));
 
         // For compatibility with old URLs' hashes
         search_query = search_query_ !== undefined ? search_query_ : search_query;

From 790a2f3feb584cc268e2b12a60314cf3cd113c35 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 4 Feb 2024 15:47:04 +0100
Subject: [PATCH 320/884] trigger CI again


From 23b9f43d4f56a99e6c6b324910e9c69a665ce92d Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 4 Feb 2024 16:18:17 +0100
Subject: [PATCH 321/884] Fix style

---
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 114a970384b..2f4c9374def 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -28,7 +28,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int INVALID_CONFIG_PARAMETER;
     extern const int AZURE_BLOB_STORAGE_ERROR;
 }

From 3d161ca71d50f795677d7af06762f37b94abaf75 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 4 Feb 2024 18:08:47 +0100
Subject: [PATCH 322/884] fix failing upgrade check due to identical
 previous_value and new_value

---
 src/Core/SettingsChangesHistory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 532c272db60..c3a9cc2e4ed 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -102,8 +102,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
-              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
+              {"format_template_row_format", "none", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "none", "", "Template result set format string can be set in query"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},

From d7d299b8a1ef50f709b42cb0b140363f65a45d90 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 4 Feb 2024 20:00:11 +0100
Subject: [PATCH 323/884] change back SettingsChangesHistory to empty strings -
 not the reason for CI failing on upgrade check

---
 src/Core/SettingsChangesHistory.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index c3a9cc2e4ed..e17ee62c2dc 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -102,11 +102,11 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"format_template_row_format", "none", "", "Template row format string can be set directly in query"},
-              {"format_template_resultset_format", "none", "", "Template result set format string can be set in query"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
-              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
+              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
+              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

From 03460ff11e3b515fbbd3211d892e24f6a6fafd20 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Sun, 4 Feb 2024 21:57:33 +0100
Subject: [PATCH 324/884] Update SettingsChangesHistory.h

Fix failing CI upgrade check due to setting changes being listed in v24.1 when the latest version has advanced to 24.2 after merging in master branch
---
 src/Core/SettingsChangesHistory.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 51c19f9af47..a190da575c7 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -89,7 +89,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
               {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
               {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
-              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}}},
+              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
+              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
@@ -110,9 +112,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
-              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
-              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
-              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}}},
+              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

From f036948f91882f6a9b594fe1393f82f2122f7da4 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 5 Feb 2024 10:09:48 +0100
Subject: [PATCH 325/884] Fix clang tidy build

---
 src/Backups/BackupIO_AzureBlobStorage.cpp            | 12 ++++++------
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 27928e871ce..52ce20d5108 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -89,8 +89,8 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const
         key = file_name;
     }
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client, key, read_settings, settings.get()->max_single_read_retries,
-        settings.get()->max_single_download_retries);
+        client, key, read_settings, settings->max_single_read_retries,
+        settings->max_single_download_retries);
 }
 
 void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
@@ -257,8 +257,8 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
     }
 
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client, key, read_settings, settings.get()->max_single_read_retries,
-        settings.get()->max_single_download_retries);
+        client, key, read_settings, settings->max_single_read_retries,
+        settings->max_single_download_retries);
 }
 
 std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
@@ -275,8 +275,8 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
     return std::make_unique<WriteBufferFromAzureBlobStorage>(
         client,
         key,
-        settings.get()->max_single_part_upload_size,
-        settings.get()->max_unexpected_write_error_retries,
+        settings->max_single_part_upload_size,
+        settings->max_unexpected_write_error_retries,
         DBMS_DEFAULT_BUFFER_SIZE,
         write_settings);
 }
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 2f4c9374def..9162f371b5b 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -97,7 +97,7 @@ namespace
 
         void calculatePartSize()
         {
-            auto max_upload_part_size = settings.get()->max_upload_part_size;
+            auto max_upload_part_size = settings->max_upload_part_size;
             if (!max_upload_part_size)
                 throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0");
             /// We've calculated the size of a normal part (the final part can be smaller).
@@ -292,7 +292,7 @@ void copyAzureBlobStorageFile(
     bool for_disk_azure_blob_storage)
 {
 
-    if (settings.get()->use_native_copy)
+    if (settings->use_native_copy)
     {
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (for_disk_azure_blob_storage)
@@ -302,7 +302,7 @@ void copyAzureBlobStorageFile(
         auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
         auto source_uri = block_blob_client_src.GetUrl();
 
-        if (size < settings.get()->max_single_part_copy_size)
+        if (size < settings->max_single_part_copy_size)
         {
             block_blob_client_dest.CopyFromUri(source_uri);
         }
@@ -326,8 +326,8 @@ void copyAzureBlobStorageFile(
         LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings.get()->max_single_read_retries,
-            settings.get()->max_single_download_retries);
+            return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings->max_single_read_retries,
+            settings->max_single_download_retries);
         };
 
         UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};

From 9223f74c917b13184340f143a79761dfdc4e387b Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Sat, 3 Feb 2024 12:10:07 +0100
Subject: [PATCH 326/884] Fix corner case when passing
 update_insert_deduplication_token_in_dependent_materialized_views

---
 docs/en/operations/settings/settings.md       |  2 +-
 .../Transforms/buildPushingToViewsChain.cpp   | 61 ++++++++++---------
 ...ation_token_hierarchical_inserts.reference |  6 ++
 ...duplication_token_hierarchical_inserts.sql | 51 ++++++++++++++++
 4 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index a6d66d952cd..3a826b095d2 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2097,7 +2097,7 @@ SELECT * FROM test_table
 
 ## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views}
 
-Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
+Allows to update `insert_deduplication_token` with view identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
 
 Possible values:
 
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 8ddc3ab0c61..1e10b7dff4c 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -247,30 +247,6 @@ Chain buildPushingToViewsChain(
         {
             insert_context->setSetting("insert_deduplicate", Field{false});
         }
-        else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
-            !insert_settings.insert_deduplication_token.value.empty())
-        {
-            /** Update deduplication token passed to dependent MV with current table id. So it is possible to properly handle
-              * deduplication in complex INSERT flows.
-              *
-              * Example:
-              *
-              * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
-              *          |                                     |
-              *          └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
-              *
-              * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
-              * be inserted into `ds_2_1`.
-              */
-            auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
-
-            if (table_id.hasUUID())
-                insert_deduplication_token += "_" + toString(table_id.uuid);
-            else
-                insert_deduplication_token += "_" + table_id.getFullNameNotQuoted();
-
-            insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
-        }
 
         // Processing of blocks for MVs is done block by block, and there will
         // be no parallel reading after (plus it is not a costless operation)
@@ -327,6 +303,35 @@ Chain buildPushingToViewsChain(
         auto & target_name = runtime_stats->target_name;
         auto * view_counter_ms = &runtime_stats->elapsed_ms;
 
+        auto new_insert_context = Context::createCopy(insert_context);
+        auto insert_settings = new_insert_context->getSettings();
+
+        if (!disable_deduplication_for_children &&
+            insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
+            !insert_settings.insert_deduplication_token.value.empty())
+        {
+            /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle
+              * deduplication in complex INSERT flows.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
+              *          |                                     |
+              *          └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
+              *
+              * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
+              * be inserted into `ds_2_1`.
+              */
+            auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
+
+            if (view_id.hasUUID())
+                insert_deduplication_token += "_" + toString(view_id.uuid);
+            else
+                insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
+
+            new_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
+        }
+
         if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(view.get()))
         {
             auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
@@ -394,7 +399,7 @@ Chain buildPushingToViewsChain(
                     insert_columns.emplace_back(column.name);
             }
 
-            InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false);
+            InterpreterInsertQuery interpreter(nullptr, new_insert_context, false, false, false);
             out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
             out.addStorageHolder(view);
             out.addStorageHolder(inner_table);
@@ -404,7 +409,7 @@ Chain buildPushingToViewsChain(
             runtime_stats->type = QueryViewsLogElement::ViewType::LIVE;
             query = live_view->getInnerQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
                 /* no_destination= */ true,
                 thread_status_holder, running_group, view_counter_ms, async_insert, storage_header);
         }
@@ -413,13 +418,13 @@ Chain buildPushingToViewsChain(
             runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW;
             query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
                 /* no_destination= */ true,
                 thread_status_holder, running_group, view_counter_ms, async_insert);
         }
         else
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
                 /* no_destination= */ false,
                 thread_status_holder, running_group, view_counter_ms, async_insert);
 
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
index 71c9053d644..b0aa78d061c 100644
--- a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
@@ -7,3 +7,9 @@ ds_3_1	all_1_1_0	0
 ds_3_1	all_2_2_0	0
 landing	all_1_1_0	0
 10
+-----
+0
+ds_1_1_fix	all_1_1_0	0
+ds_1_1_fix	all_2_2_0	0
+landing_fix	all_1_1_0	0
+10
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
index 242133e9122..31c860cc5d0 100644
--- a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
@@ -101,3 +101,54 @@ DROP VIEW mv_2_2;
 
 DROP TABLE ds_3_1;
 DROP VIEW mv_3_1;
+
+SELECT '-----';
+
+DROP TABLE IF EXISTS landing_fix;
+CREATE TABLE landing_fix
+(
+    timestamp UInt64,
+    value UInt64
+)
+ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
+
+DROP TABLE IF EXISTS ds_1_1_fix;
+CREATE TABLE ds_1_1_fix
+(
+    t UInt64,
+    v UInt64
+)
+ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
+
+DROP VIEW IF EXISTS mv_1_1;
+CREATE MATERIALIZED VIEW mv_1_1 TO ds_1_1_fix as
+SELECT
+    timestamp t, sum(value) v
+FROM landing_fix
+GROUP BY t;
+
+DROP VIEW IF EXISTS mv_1_2;
+CREATE MATERIALIZED VIEW mv_1_2 TO ds_1_1_fix as
+SELECT
+    timestamp t, sum(value) v
+FROM landing_fix
+GROUP BY t;
+
+INSERT INTO landing_fix SELECT 1 as timestamp, 1 AS value FROM numbers(10);
+
+SELECT sleep(3);
+
+INSERT INTO landing_fix SELECT 1 as timestamp, 1 AS value FROM numbers(10);
+
+SYSTEM FLUSH LOGS;
+SELECT table, name, error FROM system.part_log
+WHERE database = currentDatabase() AND table LIKE '%_fix'
+ORDER BY table, name;
+
+SELECT count() FROM landing_fix;
+
+DROP TABLE landing_fix;
+
+DROP TABLE ds_1_1_fix;
+DROP VIEW mv_1_1;
+DROP VIEW mv_1_2;
\ No newline at end of file

From 08492ae0c5608a003f1a4a0e7aabd2861bca071b Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Sun, 4 Feb 2024 12:02:09 +0100
Subject: [PATCH 327/884] Only copy insert_context when needed

---
 src/Processors/Transforms/buildPushingToViewsChain.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 1e10b7dff4c..44c5234c00e 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -303,8 +303,8 @@ Chain buildPushingToViewsChain(
         auto & target_name = runtime_stats->target_name;
         auto * view_counter_ms = &runtime_stats->elapsed_ms;
 
-        auto new_insert_context = Context::createCopy(insert_context);
-        auto insert_settings = new_insert_context->getSettings();
+        auto insert_settings = insert_context->getSettings();
+        ContextMutablePtr new_insert_context = insert_context;
 
         if (!disable_deduplication_for_children &&
             insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
@@ -329,6 +329,7 @@ Chain buildPushingToViewsChain(
             else
                 insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
 
+            new_insert_context = Context::createCopy(insert_context);
             new_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
         }
 

From 2fb7f980ef1b9f80c37c3f68d9b939216924b715 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Mon, 5 Feb 2024 10:16:07 +0100
Subject: [PATCH 328/884] Address PR comments

---
 .../Transforms/buildPushingToViewsChain.cpp   | 26 +++++++++++++------
 ...duplication_token_hierarchical_inserts.sql |  2 +-
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 44c5234c00e..40f4166283b 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -303,8 +303,8 @@ Chain buildPushingToViewsChain(
         auto & target_name = runtime_stats->target_name;
         auto * view_counter_ms = &runtime_stats->elapsed_ms;
 
-        auto insert_settings = insert_context->getSettings();
-        ContextMutablePtr new_insert_context = insert_context;
+        const auto & insert_settings = insert_context->getSettingsRef();
+        ContextMutablePtr view_insert_context = insert_context;
 
         if (!disable_deduplication_for_children &&
             insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
@@ -321,6 +321,16 @@ Chain buildPushingToViewsChain(
               *
               * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
               * be inserted into `ds_2_1`.
+              *
+              * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables
+              * are involved.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 --┬-> ds_1_1
+              *          |             |
+              *          └--> mv_1_2 --┘
+              *
               */
             auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
 
@@ -329,8 +339,8 @@ Chain buildPushingToViewsChain(
             else
                 insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
 
-            new_insert_context = Context::createCopy(insert_context);
-            new_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
+            view_insert_context = Context::createCopy(insert_context);
+            view_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
         }
 
         if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(view.get()))
@@ -400,7 +410,7 @@ Chain buildPushingToViewsChain(
                     insert_columns.emplace_back(column.name);
             }
 
-            InterpreterInsertQuery interpreter(nullptr, new_insert_context, false, false, false);
+            InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false);
             out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
             out.addStorageHolder(view);
             out.addStorageHolder(inner_table);
@@ -410,7 +420,7 @@ Chain buildPushingToViewsChain(
             runtime_stats->type = QueryViewsLogElement::ViewType::LIVE;
             query = live_view->getInnerQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                 /* no_destination= */ true,
                 thread_status_holder, running_group, view_counter_ms, async_insert, storage_header);
         }
@@ -419,13 +429,13 @@ Chain buildPushingToViewsChain(
             runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW;
             query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                 /* no_destination= */ true,
                 thread_status_holder, running_group, view_counter_ms, async_insert);
         }
         else
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, new_insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                 /* no_destination= */ false,
                 thread_status_holder, running_group, view_counter_ms, async_insert);
 
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
index 31c860cc5d0..c033b500e14 100644
--- a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
@@ -151,4 +151,4 @@ DROP TABLE landing_fix;
 
 DROP TABLE ds_1_1_fix;
 DROP VIEW mv_1_1;
-DROP VIEW mv_1_2;
\ No newline at end of file
+DROP VIEW mv_1_2;

From 87cc319cc4d194bc819b46ce4ce2f5e942bf1a00 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Mon, 5 Feb 2024 10:27:32 +0100
Subject: [PATCH 329/884] Split tests

---
 ...ation_token_hierarchical_inserts.reference |  6 ---
 ...duplication_token_hierarchical_inserts.sql | 51 ------------------
 ...token_hierarchical_inserts_views.reference |  5 ++
 ...ation_token_hierarchical_inserts_views.sql | 53 +++++++++++++++++++
 4 files changed, 58 insertions(+), 57 deletions(-)
 create mode 100644 tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.reference
 create mode 100644 tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.sql

diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
index b0aa78d061c..71c9053d644 100644
--- a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference
@@ -7,9 +7,3 @@ ds_3_1	all_1_1_0	0
 ds_3_1	all_2_2_0	0
 landing	all_1_1_0	0
 10
------
-0
-ds_1_1_fix	all_1_1_0	0
-ds_1_1_fix	all_2_2_0	0
-landing_fix	all_1_1_0	0
-10
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
index c033b500e14..242133e9122 100644
--- a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql
@@ -101,54 +101,3 @@ DROP VIEW mv_2_2;
 
 DROP TABLE ds_3_1;
 DROP VIEW mv_3_1;
-
-SELECT '-----';
-
-DROP TABLE IF EXISTS landing_fix;
-CREATE TABLE landing_fix
-(
-    timestamp UInt64,
-    value UInt64
-)
-ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
-
-DROP TABLE IF EXISTS ds_1_1_fix;
-CREATE TABLE ds_1_1_fix
-(
-    t UInt64,
-    v UInt64
-)
-ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
-
-DROP VIEW IF EXISTS mv_1_1;
-CREATE MATERIALIZED VIEW mv_1_1 TO ds_1_1_fix as
-SELECT
-    timestamp t, sum(value) v
-FROM landing_fix
-GROUP BY t;
-
-DROP VIEW IF EXISTS mv_1_2;
-CREATE MATERIALIZED VIEW mv_1_2 TO ds_1_1_fix as
-SELECT
-    timestamp t, sum(value) v
-FROM landing_fix
-GROUP BY t;
-
-INSERT INTO landing_fix SELECT 1 as timestamp, 1 AS value FROM numbers(10);
-
-SELECT sleep(3);
-
-INSERT INTO landing_fix SELECT 1 as timestamp, 1 AS value FROM numbers(10);
-
-SYSTEM FLUSH LOGS;
-SELECT table, name, error FROM system.part_log
-WHERE database = currentDatabase() AND table LIKE '%_fix'
-ORDER BY table, name;
-
-SELECT count() FROM landing_fix;
-
-DROP TABLE landing_fix;
-
-DROP TABLE ds_1_1_fix;
-DROP VIEW mv_1_1;
-DROP VIEW mv_1_2;
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.reference b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.reference
new file mode 100644
index 00000000000..e1bcc64aaeb
--- /dev/null
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.reference
@@ -0,0 +1,5 @@
+0
+ds_1_1	all_1_1_0	0
+ds_1_1	all_2_2_0	0
+landing	all_1_1_0	0
+10
diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.sql
new file mode 100644
index 00000000000..d82ff4afd93
--- /dev/null
+++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts_views.sql
@@ -0,0 +1,53 @@
+SET insert_deduplicate = 1;
+SET deduplicate_blocks_in_dependent_materialized_views = 1;
+SET update_insert_deduplication_token_in_dependent_materialized_views = 1;
+SET insert_deduplication_token = 'test';
+
+DROP TABLE IF EXISTS landing;
+CREATE TABLE landing
+(
+    timestamp UInt64,
+    value UInt64
+)
+ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
+
+DROP TABLE IF EXISTS ds_1_1;
+CREATE TABLE ds_1_1
+(
+    t UInt64,
+    v UInt64
+)
+ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
+
+DROP VIEW IF EXISTS mv_1_1;
+CREATE MATERIALIZED VIEW mv_1_1 TO ds_1_1 as
+SELECT
+    timestamp t, sum(value) v
+FROM landing
+GROUP BY t;
+
+DROP VIEW IF EXISTS mv_1_2;
+CREATE MATERIALIZED VIEW mv_1_2 TO ds_1_1 as
+SELECT
+    timestamp t, sum(value) v
+FROM landing
+GROUP BY t;
+
+INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10);
+
+SELECT sleep(3);
+
+INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10);
+
+SYSTEM FLUSH LOGS;
+SELECT table, name, error FROM system.part_log
+WHERE database = currentDatabase()
+ORDER BY table, name;
+
+SELECT count() FROM landing;
+
+DROP TABLE landing;
+
+DROP TABLE ds_1_1;
+DROP VIEW mv_1_1;
+DROP VIEW mv_1_2;

From 454b03270988494e4291cacef2361af5586c1260 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Feb 2024 12:52:18 +0300
Subject: [PATCH 330/884] MultiVersion use mutex

---
 src/Common/MultiVersion.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/Common/MultiVersion.h b/src/Common/MultiVersion.h
index 6ea337e6a7c..8f488f9fcbc 100644
--- a/src/Common/MultiVersion.h
+++ b/src/Common/MultiVersion.h
@@ -2,6 +2,7 @@
 
 #include <atomic>
 #include <memory>
+#include <mutex>
 #include <base/defines.h>
 
 
@@ -20,6 +21,9 @@
   * }   // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed.
   *
   * All methods are thread-safe.
+  *
+  * Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations on shared_ptr,
+  * because standard library implementation uses fixed table of mutexes, and it is better to avoid contention here.
   */
 template <typename T>
 class MultiVersion
@@ -42,25 +46,37 @@ public:
     MultiVersion & operator=(MultiVersion && src)
     {
         if (this != &src)
-            std::atomic_store(&current_version, std::atomic_exchange(&src.current_version, Version{}));
+        {
+            Version version;
+
+            {
+                std::lock_guard<std::mutex> lock(src.mutex);
+                src.current_version.swap(version);
+            }
+
+            std::lock_guard<std::mutex> lock(mutex);
+            current_version = std::move(version);
+        }
+
         return *this;
     }
 
     /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version.
     Version get() const
     {
-        return std::atomic_load(&current_version);
+        std::lock_guard<std::mutex> lock(mutex);
+        return current_version;
     }
 
-    /// TODO: replace atomic_load/store() on shared_ptr (which is deprecated as of C++20) by C++20 std::atomic<std::shared_ptr>.
-    /// Clang 15 currently does not support it.
-
     /// Update an object with new version.
     void set(std::unique_ptr<const T> && value)
     {
-        std::atomic_store(&current_version, Version{std::move(value)});
+        Version version{std::move(value)};
+        std::lock_guard<std::mutex> lock(mutex);
+        current_version = std::move(version);
     }
 
 private:
+    mutable std::mutex mutex;
     Version current_version;
 };

From 10efb30e657623e67cbc15e046ef29e8f423f61e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 10:55:13 +0100
Subject: [PATCH 331/884] Fix aws submodule reference

---
 contrib/aws | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/aws b/contrib/aws
index 4ec215f3607..9eb5097a0ab 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4
+Subproject commit 9eb5097a0abfa837722cca7a5114a25837817bf2

From 6ba35b54597b44fe674f98964df6a12a670b96e5 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 5 Feb 2024 11:17:58 +0100
Subject: [PATCH 332/884] Make comments for system tables also available in
 local mode and enforce comments not to be empty (#59493)

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 src/Interpreters/SystemLog.cpp                | 11 ++++----
 src/Storages/System/attachSystemTablesImpl.h  | 28 +++++++++++--------
 .../02982_comments_in_system_tables.reference |  0
 .../02982_comments_in_system_tables.sh        |  8 ++++++
 4 files changed, 31 insertions(+), 16 deletions(-)
 create mode 100644 tests/queries/0_stateless/02982_comments_in_system_tables.reference
 create mode 100755 tests/queries/0_stateless/02982_comments_in_system_tables.sh

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 2fb782befa1..6580dc3e9b7 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -211,16 +211,17 @@ std::shared_ptr<TSystemLog> createSystemLog(
             if (!settings.empty())
                 log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings;
         }
-
-        /// Add comment to AST. So it will be saved when the table will be renamed.
-        log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));
     }
 
     /// Validate engine definition syntax to prevent some configuration errors.
     ParserStorageWithComment storage_parser;
-
-    parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
+    auto storage_ast = parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
             "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    auto & storage_with_comment = storage_ast->as<StorageWithComment &>();
+
+    /// Add comment to AST. So it will be saved when the table will be renamed.
+    if (!storage_with_comment.comment || storage_with_comment.comment->as<ASTLiteral &>().value.safeGet<String>().empty())
+        log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));
 
     log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",
                                                                                TSystemLog::getDefaultFlushIntervalMilliseconds());
diff --git a/src/Storages/System/attachSystemTablesImpl.h b/src/Storages/System/attachSystemTablesImpl.h
index 0b0a22baa13..9f2c4e8016d 100644
--- a/src/Storages/System/attachSystemTablesImpl.h
+++ b/src/Storages/System/attachSystemTablesImpl.h
@@ -7,14 +7,20 @@
 namespace DB
 {
 
-template<typename StorageT, typename... StorageArgs>
-void attach(ContextPtr context, IDatabase & system_database, const String & table_name, const String & comment, StorageArgs && ... args)
+template <int Length>
+using StringLiteral = const char(&)[Length];
+
+template<typename StorageT, int CommentSize, typename... StorageArgs>
+void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral<CommentSize> comment, StorageArgs && ... args)
 {
+    static_assert(CommentSize > 15, "The comment for a system table is too short or empty");
     assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE);
+
+    auto table_id = StorageID::createEmpty();
     if (system_database.getUUID() == UUIDHelpers::Nil)
     {
         /// Attach to Ordinary database.
-        auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name);
+        table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name);
         system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...));
     }
     else
@@ -22,18 +28,18 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl
         /// Attach to Atomic database.
         /// NOTE: UUIDs are not persistent, but it's ok since no data are stored on disk for these storages
         /// and path is actually not used
-        auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4());
+        table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4());
         DatabaseCatalog::instance().addUUIDMapping(table_id.uuid);
         String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid);
         system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...), path);
-
-        /// Set the comment
-        auto table = DatabaseCatalog::instance().getTable(table_id, context);
-        assert(table);
-        auto metadata = table->getInMemoryMetadata();
-        metadata.comment = comment;
-        table->setInMemoryMetadata(metadata);
     }
+
+    /// Set the comment
+    auto table = DatabaseCatalog::instance().getTable(table_id, context);
+    assert(table);
+    auto metadata = table->getInMemoryMetadata();
+    metadata.comment = comment;
+    table->setInMemoryMetadata(metadata);
 }
 
 }
diff --git a/tests/queries/0_stateless/02982_comments_in_system_tables.reference b/tests/queries/0_stateless/02982_comments_in_system_tables.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02982_comments_in_system_tables.sh b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
new file mode 100755
index 00000000000..cc04592bc27
--- /dev/null
+++ b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+${CLICKHOUSE_LOCAL}  --query "SELECT 'Table ' || database || '.' || name || ' doesnt have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
+${CLICKHOUSE_CLIENT} --query "SELECT 'Table ' || database || '.' || name || ' doesnt have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"

From a2fc3facca0f62405addeb4119c745faaa191c11 Mon Sep 17 00:00:00 2001
From: mikhnenko <mikhnenko@yandex-team.com>
Date: Mon, 5 Feb 2024 13:21:06 +0300
Subject: [PATCH 333/884] Add missed #include <bit> and <exception>

---
 base/base/bit_cast.h          | 1 +
 base/base/wide_integer_impl.h | 1 +
 src/Common/Exception.h        | 1 +
 3 files changed, 3 insertions(+)

diff --git a/base/base/bit_cast.h b/base/base/bit_cast.h
index 4783a84586b..9a92b7660f1 100644
--- a/base/base/bit_cast.h
+++ b/base/base/bit_cast.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <bit>
 #include <cstring>
 #include <algorithm>
 #include <type_traits>
diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index c1fd7b69b7f..17b1fa7cd6a 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -6,6 +6,7 @@
 
 #include "throwError.h"
 
+#include <bit>
 #include <cmath>
 #include <cfloat>
 #include <cassert>
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 817a4400080..94e4a986feb 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <cerrno>
+#include <exception>
 #include <vector>
 #include <memory>
 

From fd2fdcdb4b33fe91227d7d623aea24a0356f29f0 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 5 Feb 2024 11:23:37 +0100
Subject: [PATCH 334/884] Fixed unwanted dereferencing

---
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 9162f371b5b..4714c795927 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -58,7 +58,7 @@ namespace
             , schedule(schedule_)
             , for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
             , log(log_)
-            , max_single_part_upload_size(settings_.get()->max_single_part_upload_size)
+            , max_single_part_upload_size(settings_->max_single_part_upload_size)
         {
         }
 

From 8581bdbe58bdeea57c33f1e499624111ad26d09b Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 2 Feb 2024 18:44:01 +0100
Subject: [PATCH 335/884] do not call value() on empty optional query_info

---
 src/Server/GRPCServer.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index f31a8d6feb5..7c532312612 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -419,7 +419,11 @@ namespace
         void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
         {
             if (!query_info.has_value())
+            {
                 callback(false);
+                return;
+            }
+
             query_info_ = std::move(query_info).value();
             query_info.reset();
             callback(true);
@@ -486,7 +490,11 @@ namespace
         void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
         {
             if (!query_info.has_value())
+            {
                 callback(false);
+                return;
+            }
+
             query_info_ = std::move(query_info).value();
             query_info.reset();
             callback(true);

From 45bcc04d98ebaaf511ccf2920c1eaca98ee3a10c Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 5 Feb 2024 12:56:03 +0000
Subject: [PATCH 336/884] Use ConnectionPoolPtr instead of raw pointer

---
 src/Processors/QueryPlan/ReadFromRemote.cpp | 2 +-
 src/Processors/QueryPlan/ReadFromRemote.h   | 2 +-
 src/QueryPipeline/RemoteQueryExecutor.cpp   | 2 +-
 src/QueryPipeline/RemoteQueryExecutor.h     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 379ea950081..1edb672aa38 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -439,7 +439,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
 
 
 void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
-    Pipes & pipes, IConnectionPool * pool, IConnections::ReplicaInfo replica_info)
+    Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info)
 {
     bool add_agg_info = stage == QueryProcessingStage::WithMergeableState;
     bool add_totals = false;
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index eb3bcd12cc3..67fd9d24261 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -90,7 +90,7 @@ public:
     void enforceAggregationInOrder();
 
 private:
-    void addPipeForSingeReplica(Pipes & pipes, IConnectionPool * pool, IConnections::ReplicaInfo replica_info);
+    void addPipeForSingeReplica(Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info);
 
     ClusterPtr cluster;
     ASTPtr query_ast;
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 8a4bee1d8af..1a68c9d4471 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -64,7 +64,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 }
 
 RemoteQueryExecutor::RemoteQueryExecutor(
-    IConnectionPool * pool,
+    ConnectionPoolPtr pool,
     const String & query_,
     const Block & header_,
     ContextPtr context_,
diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index e617deef7e8..6b1539bd08e 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -54,7 +54,7 @@ public:
 
     /// Takes a connection pool for a node (not cluster)
     RemoteQueryExecutor(
-        IConnectionPool * pool,
+        ConnectionPoolPtr pool,
         const String & query_,
         const Block & header_,
         ContextPtr context_,

From ad13a57b7f462fa7c0ed0a50a2572969d334e845 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 5 Feb 2024 13:43:55 +0000
Subject: [PATCH 337/884] Improve detection of v2 capabilities

---
 base/base/getMemoryAmount.cpp | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index 7f50683944e..ccdc0f0f976 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -27,14 +27,26 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
     if (!controllers_file.is_open())
         return {};
 
-    /// We also need the memory controller enabled
-    std::stringstream controllers_buf;
-    controllers_buf << controllers_file.rdbuf();
-    std::string controllers = controllers_buf.str();
-    if (controllers.find("memory") == std::string::npos)
+    /// Make sure that the memory controller is enabled.
+    /// - cgroup.controllers defines which controllers *can* be enabled.
+    /// - cgroup.subtree_control defines which controllers *are* enabled.
+    /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
+    /// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group.
+    /// ReadBufferFromFile subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
+    /// std::string subtree_control;
+    /// readString(subtree_control, subtree_control_file);
+    /// if (subtree_control.find("memory") == std::string::npos)
+    ///     return {};
+    std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
+    std::stringstream subtree_control_buf;
+    subtree_control_buf << subtree_control_file.rdbuf();
+    std::string subtree_control = subtree_control_buf.str();
+    if (subtree_control.find("memory") == std::string::npos)
         return {};
 
     /// Identify the cgroup the process belongs to
+    /// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
+    /// A simpler way to get the membership is:
     std::ifstream cgroup_name_file("/proc/self/cgroup");
     if (!cgroup_name_file.is_open())
         return {};
@@ -44,10 +56,9 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
     std::string cgroup_name = cgroup_name_buf.str();
     if (!cgroup_name.empty() && cgroup_name.back() == '\n')
         cgroup_name.pop_back(); /// remove trailing newline, if any
-    /// cgroups v2 will show a single line with prefix "0::/"
-    /// - https://book.hacktricks.xyz/linux-hardening/privilege-escalation/docker-security/cgroups
+    /// With cgroups v2, there will be a *single* line with prefix "0::/"
     const std::string v2_prefix = "0::/";
-    if (cgroup_name.find('\n') != std::string::npos || !cgroup_name.starts_with(v2_prefix))
+    if (!cgroup_name.starts_with(v2_prefix))
         return {};
     cgroup_name = cgroup_name.substr(v2_prefix.length());
 

From f03d8006d46b35d862a77bc20a8e6bc4296a76e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 5 Feb 2024 17:12:39 +0300
Subject: [PATCH 338/884] Update CHANGELOG.md

---
 CHANGELOG.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50db3292ca8..60618402174 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,8 +6,6 @@
 
 ### <a id="241"></a> ClickHouse release 24.1, 2024-01-30
 
-### ClickHouse release master (b4a5b6060ea) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
-
 #### Backward Incompatible Change
 * The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).

From 576cfdbf5cb4428868a03f1b78da760f776010fd Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 5 Feb 2024 14:27:07 +0000
Subject: [PATCH 339/884] Includes cleanup

---
 src/Client/Connection.h                     | 1 -
 src/Client/IConnections.h                   | 2 --
 src/Processors/QueryPlan/ReadFromRemote.cpp | 2 +-
 src/Processors/QueryPlan/ReadFromRemote.h   | 3 ---
 4 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index e93a7539d15..5d0411027a1 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -19,7 +19,6 @@
 
 #include <Storages/MergeTree/RequestResponse.h>
 
-#include <atomic>
 #include <optional>
 
 #include "config.h"
diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h
index ee17d198fc3..ebc71511834 100644
--- a/src/Client/IConnections.h
+++ b/src/Client/IConnections.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <compare>
-
 #include <Client/Connection.h>
 #include <Storages/MergeTree/RequestResponse.h>
 
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 1edb672aa38..6764e095088 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -12,7 +12,7 @@
 #include <Processors/Sources/DelayedSource.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Interpreters/ActionsDAG.h>
-#include "Common/logger_useful.h"
+#include <Common/logger_useful.h>
 #include <Common/checkStackSize.h>
 #include <Core/QueryProcessingStage.h>
 #include <Client/ConnectionPool.h>
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index 67fd9d24261..498d584e85a 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -9,9 +9,6 @@
 
 namespace DB
 {
-
-class IConnectionPool;
-
 class Throttler;
 using ThrottlerPtr = std::shared_ptr<Throttler>;
 

From 8748d8c537c96dccb31df76099425b3321ff9e02 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 5 Feb 2024 15:10:51 +0000
Subject: [PATCH 340/884] UNEXPECTED_CLUSTER error for cluster with more than 1
 shard

---
 src/Common/ErrorCodes.cpp                      | 1 +
 src/Interpreters/ClusterProxy/executeQuery.cpp | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 2e154ddb32d..8e81a626b41 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -593,6 +593,7 @@
     M(711, FILECACHE_ACCESS_DENIED) \
     M(712, TOO_MANY_MATERIALIZED_VIEWS) \
     M(713, BROKEN_PROJECTION) \
+    M(714, UNEXPECTED_CLUSTER) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 023ed6c7b61..33b86854ba9 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -32,6 +32,7 @@ namespace ErrorCodes
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int LOGICAL_ERROR;
     extern const int CLUSTER_DOESNT_EXIST;
+    extern const int UNEXPECTED_CLUSTER;
 }
 
 namespace ClusterProxy
@@ -399,8 +400,10 @@ void executeQueryWithParallelReplicas(
     }
     else
     {
-        // todo: add error and exception for this case
-        chassert(not_optimized_cluster->getShardCount() == 1);
+        if (not_optimized_cluster->getShardCount() > 1)
+            throw DB::Exception(
+                ErrorCodes::UNEXPECTED_CLUSTER,
+                "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard");
     }
 
     auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(

From a50a14062621733b938cb9fb986c18d698ee3c36 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 5 Feb 2024 15:29:08 +0000
Subject: [PATCH 341/884] Test for UNEXPECTED_CLUSTER

---
 .../02982_parallel_replicas_unexpected_cluster.reference  | 0
 .../02982_parallel_replicas_unexpected_cluster.sql        | 8 ++++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.reference
 create mode 100644 tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.sql

diff --git a/tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.reference b/tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.sql b/tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.sql
new file mode 100644
index 00000000000..210b7d2a18a
--- /dev/null
+++ b/tests/queries/0_stateless/02982_parallel_replicas_unexpected_cluster.sql
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS test_unexpected_cluster;
+CREATE TABLE test_unexpected_cluster (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
+INSERT INTO test_unexpected_cluster SELECT * FROM numbers(10);
+
+SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=2, cluster_for_parallel_replicas='test_cluster_two_shards', parallel_replicas_for_non_replicated_merge_tree=1;
+SELECT count() FROM test_unexpected_cluster WHERE NOT ignore(*); -- { serverError UNEXPECTED_CLUSTER }
+
+DROP TABLE test_unexpected_cluster;

From 7ad48c2aa219ef1a79c5fcc593097d6e0d5d95fc Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 5 Feb 2024 17:02:16 +0100
Subject: [PATCH 342/884] Fix

---
 src/Coordination/Changelog.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index e61bcc5f163..d7152f350f7 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -4,8 +4,6 @@
 #include <Common/ThreadPool_fwd.h>
 #include <Common/ConcurrentBoundedQueue.h>
 
-#include <boost/smart_ptr/intrusive_ptr.hpp>
-
 #include <map>
 #include <unordered_set>
 
@@ -21,7 +19,7 @@ namespace Poco
     class Logger;
 }
 
-using LoggerPtr = boost::intrusive_ptr<Poco::Logger>;
+using LoggerPtr = std::shared_ptr<Poco::Logger>;
 
 namespace DB
 {

From 9b5ea7bb8a966da3681b8887ab6c87debc4e3e39 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 1 Feb 2024 14:29:25 +0000
Subject: [PATCH 343/884] Analyzer: Fix 00917_multiple_joins_denny_crane

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index cb1e94305fb..5e1e42b25ac 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2759,7 +2759,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     {
         if (identifier_lookup.isExpressionLookup())
         {
-            return tryResolveIdentifierFromCompoundExpression(identifier_lookup.identifier, 1 /*identifier_bind_size*/, it->second, {}, scope);
+            return tryResolveIdentifierFromCompoundExpression(
+                identifier_lookup.identifier,
+                1 /*identifier_bind_size*/,
+                it->second,
+                {} /* compound_expression_source */,
+                scope,
+                identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */);
         }
         else if (identifier_lookup.isFunctionLookup() || identifier_lookup.isTableExpressionLookup())
         {

From cbb63e333c1e263e89accc5262e4076753453fa3 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Fri, 2 Feb 2024 14:48:38 +0100
Subject: [PATCH 344/884] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index d944dd96e25..b84b46114b4 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -23,7 +23,6 @@
 02428_parameterized_view
 02493_inconsistent_hex_and_binary_number
 02575_merge_prewhere_different_default_kind
-00917_multiple_joins_denny_crane
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
 02818_parameterized_view_with_cte_multiple_usage

From c4e1d09c66914b95bb8e8d82bff6ee7d24ecc986 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 17:33:14 +0100
Subject: [PATCH 345/884] Reserve enough memory for
 memcpySmallAllowReadWriteOverflow15

---
 src/Functions/array/arrayElement.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index cea407aee02..8669fd1f3a7 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -670,8 +670,7 @@ struct ArrayElementStringImpl
         ColumnArray::Offset current_offset = 0;
         /// get the total result bytes at first, and reduce the cost of result_data.resize.
         size_t total_result_bytes = 0;
-        ColumnString::Chars zero_buf(1);
-        zero_buf.push_back(0);
+        ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
         std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
         selected_bufs.reserve(size);
         for (size_t i = 0; i < size; ++i)
@@ -737,8 +736,7 @@ struct ArrayElementStringImpl
         size_t size = offsets.size();
         result_offsets.resize(size);
 
-        ColumnString::Chars zero_buf(1);
-        zero_buf.push_back(0);
+        ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
         ColumnArray::Offset current_offset = 0;
         /// get the total result bytes at first, and reduce the cost of result_data.resize.
         size_t total_result_bytes = 0;

From ca9e2cac8a6c6fa368a38f8b81360072343aef7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 17:37:06 +0100
Subject: [PATCH 346/884] Add test

---
 .../0_stateless/02983_empty_map.reference     |  7 +++++++
 tests/queries/0_stateless/02983_empty_map.sql | 21 +++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 tests/queries/0_stateless/02983_empty_map.reference
 create mode 100644 tests/queries/0_stateless/02983_empty_map.sql

diff --git a/tests/queries/0_stateless/02983_empty_map.reference b/tests/queries/0_stateless/02983_empty_map.reference
new file mode 100644
index 00000000000..fadedaf23ae
--- /dev/null
+++ b/tests/queries/0_stateless/02983_empty_map.reference
@@ -0,0 +1,7 @@
+-- { echoOn }
+SELECT f1, f2['2'], count() FROM t1 GROUP BY 1,2 order by 1,2;
+1		1000111
+SELECT f1, f3['2'], count() FROM t1 GROUP BY 1,2 order by 1,2;
+1		1000111
+SELECT f1, f4[2], count() FROM t1 GROUP BY 1,2 order by 1,2;
+1	0	1000111
diff --git a/tests/queries/0_stateless/02983_empty_map.sql b/tests/queries/0_stateless/02983_empty_map.sql
new file mode 100644
index 00000000000..78bc5d8736f
--- /dev/null
+++ b/tests/queries/0_stateless/02983_empty_map.sql
@@ -0,0 +1,21 @@
+--https://github.com/ClickHouse/ClickHouse/issues/59402
+CREATE TABLE t1
+(
+    f1 Int32,
+    f2 Map(LowCardinality(String),LowCardinality(String)),
+    f3 Map(String,String),
+    f4 Map(Int32,Int32)
+)
+ENGINE=Memory AS
+SELECT 1 as f1,
+       map(number%2,number%10) as f2,
+       f2 as f3,
+       f2 as f4
+from numbers(1000111);
+
+SET max_block_size=10;
+
+-- { echoOn }
+SELECT f1, f2['2'], count() FROM t1 GROUP BY 1,2 order by 1,2;
+SELECT f1, f3['2'], count() FROM t1 GROUP BY 1,2 order by 1,2;
+SELECT f1, f4[2], count() FROM t1 GROUP BY 1,2 order by 1,2;

From a0170bcbd5a7f9fbc956a6667a0929a72c1f189e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 5 Feb 2024 15:30:00 +0100
Subject: [PATCH 347/884] Fix utils

---
 programs/keeper-converter/KeeperConverter.cpp | 43 +++++++------
 programs/keeper/CMakeLists.txt                |  2 +-
 src/Coordination/KeeperSnapshotManager.cpp    | 24 +++-----
 src/Coordination/KeeperStorage.cpp            | 44 ++++++++++++-
 src/Coordination/KeeperStorage.h              | 61 +++++++------------
 src/Coordination/SnapshotableHashTable.h      |  4 +-
 src/Coordination/ZooKeeperDataReader.cpp      | 37 ++++++-----
 src/Coordination/tests/gtest_coordination.cpp | 42 ++++++-------
 utils/keeper-bench/Generator.cpp              |  1 +
 utils/keeper-data-dumper/main.cpp             | 14 ++---
 10 files changed, 146 insertions(+), 126 deletions(-)

diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp
index f39a7bd5eb8..92bdea28738 100644
--- a/programs/keeper-converter/KeeperConverter.cpp
+++ b/programs/keeper-converter/KeeperConverter.cpp
@@ -1,5 +1,4 @@
 #include <iostream>
-#include <optional>
 #include <boost/program_options.hpp>
 
 #include <Coordination/KeeperSnapshotManager.h>
@@ -38,31 +37,31 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
         return 0;
     }
 
-    //try
-    //{
-    //    auto keeper_context = std::make_shared<KeeperContext>(true);
-    //    keeper_context->setDigestEnabled(true);
-    //    keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
+    try
+    {
+        auto keeper_context = std::make_shared<KeeperContext>(true);
+        keeper_context->setDigestEnabled(true);
+        keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
 
-    //    DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
+        DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
 
-    //    DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
-    //    storage.initializeSystemNodes();
+        DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
+        storage.initializeSystemNodes();
 
-    //    DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
-    //    DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
-    //    DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
+        DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
+        DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
+        DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
 
-    //    DB::KeeperSnapshotManager manager(1, keeper_context);
-    //    auto snp = manager.serializeSnapshotToBuffer(snapshot);
-    //    auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
-    //    std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl;
-    //}
-    //catch (...)
-    //{
-    //    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    //    return getCurrentExceptionCode();
-    //}
+        DB::KeeperSnapshotManager manager(1, keeper_context);
+        auto snp = manager.serializeSnapshotToBuffer(snapshot);
+        auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
+        std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl;
+    }
+    catch (...)
+    {
+        std::cerr << getCurrentExceptionMessage(true) << '\n';
+        return getCurrentExceptionCode();
+    }
 
     return 0;
 }
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index fba9b3e4d86..143ded0ee85 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -44,7 +44,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp
-        #${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp
 
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index e1466172fd9..fe6c7e3a389 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -86,14 +86,14 @@ namespace
         writeBinary(node.version, out);
         writeBinary(node.cversion, out);
         writeBinary(node.aversion, out);
-        const bool is_ephemeral = node.isEphemeral(); 
+        const bool is_ephemeral = node.isEphemeral();
         writeBinary(is_ephemeral ? node.ephemeralOwner() : 0, out);
         if (version < SnapshotVersion::V6)
             writeBinary(static_cast<int32_t>(node.data_size), out);
         writeBinary(is_ephemeral ? 0 : node.numChildren(), out);
         writeBinary(node.pzxid, out);
 
-        writeBinary(node.seqNum(), out);
+        writeBinary(is_ephemeral ? 0 : node.seqNum(), out);
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
             writeBinary(node.sizeInBytes(), out);
@@ -153,26 +153,23 @@ namespace
         int64_t ephemeral_owner = 0;
         readBinary(ephemeral_owner, in);
         if (ephemeral_owner != 0)
-        {
-            node.is_ephemeral_and_mtime.is_ephemeral = true;
-            node.ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
-        }
+            node.setEphemeralOwner(ephemeral_owner);
 
         if (version < SnapshotVersion::V6)
         {
             int32_t data_length = 0;
             readBinary(data_length, in);
         }
-        int32_t num_children;
+        int32_t num_children = 0;
         readBinary(num_children, in);
-        if (num_children)
+        if (num_children != 0)
             node.ephemeral_or_children_data.children_info.num_children = num_children;
 
         readBinary(node.pzxid, in);
 
-        int32_t seq_num;
+        int32_t seq_num = 0;
         readBinary(seq_num, in);
-        if (seq_num)
+        if (seq_num != 0)
             node.ephemeral_or_children_data.children_info.seq_num = seq_num;
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
@@ -383,11 +380,6 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
     if (recalculate_digest)
         storage.nodes_digest = 0;
 
-    const auto is_node_empty = [](const auto & /*node*/)
-    {
-        return false; //node.getData().empty() && node == KeeperStorage::Node{};
-    };
-
     for (size_t nodes_read = 0; nodes_read < snapshot_container_size; ++nodes_read)
     {
         std::string path;
@@ -415,7 +407,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
         }
         else if (match_result == EXACT)
         {
-            if (!is_node_empty(node))
+            if (!node.empty())
             {
                 if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT)
                 {
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index b06f87fc054..ac4a9433e30 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -191,6 +191,47 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
 
 }
 
+KeeperStorage::Node::~Node()
+{
+    if (data_size)
+        delete[] data;
+}
+
+KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other)
+{
+    if (this == &other)
+        return *this;
+
+    czxid = other.czxid;
+    mzxid = other.mzxid;
+    pzxid = other.pzxid;
+    acl_id = other.acl_id;
+    has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
+    is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
+    ephemeral_or_children_data = other.ephemeral_or_children_data;
+    data_size = other.data_size;
+    version = other.version;
+    cversion = other.cversion;
+    aversion = other.aversion;
+
+    if (data_size != 0)
+    {
+        data = new char[data_size];
+        memcpy(data, other.data, data_size);
+    }
+    return *this;
+}
+
+KeeperStorage::Node::Node(const Node & other)
+{
+    *this = other;
+}
+
+bool KeeperStorage::Node::empty() const
+{
+    return data_size == 0 && mzxid == 0;
+}
+
 void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
 {
     czxid = stat.czxid;
@@ -211,8 +252,7 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
     }
     else
     {
-        is_ephemeral_and_mtime.is_ephemeral = true;
-        ephemeral_or_children_data.ephemeral_owner = stat.ephemeralOwner;
+        setEphemeralOwner(stat.ephemeralOwner);
     }
 }
 
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index 76b6ab43c50..55d2e20d44c 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -5,17 +5,15 @@
 #include <Coordination/ACLMap.h>
 #include <Coordination/SessionExpiryQueue.h>
 #include <Coordination/SnapshotableHashTable.h>
-#include <IO/WriteBufferFromString.h>
-#include <Common/ConcurrentBoundedQueue.h>
-#include <Common/ZooKeeper/IKeeper.h>
-#include <Common/ZooKeeper/ZooKeeperCommon.h>
-#include <Coordination/KeeperContext.h>
 
 #include <absl/container/flat_hash_set.h>
 
 namespace DB
 {
 
+class KeeperContext;
+using KeeperContextPtr = std::shared_ptr<KeeperContext>;
+
 struct KeeperStorageRequestProcessor;
 using KeeperStorageRequestProcessorPtr = std::shared_ptr<KeeperStorageRequestProcessor>;
 using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
@@ -43,13 +41,13 @@ public:
         mutable struct
         {
             bool has_cached_digest : 1;
-            int64_t ctime : 7;
+            int64_t ctime : 63;
         } has_cached_digest_and_ctime{false, 0};
 
         struct
         {
             bool is_ephemeral : 1;
-            int64_t mtime : 7;
+            int64_t mtime : 63;
         } is_ephemeral_and_mtime{false, 0};
 
 
@@ -74,47 +72,18 @@ public:
         /// pack the boolean with seq_num above
         mutable uint64_t cached_digest = 0;
 
-        ~Node()
-        {
-            if (data_size)
-                delete [] data;
-        }
+        ~Node();
 
         Node() = default;
 
-        Node & operator=(const Node & other)
-        {
-            if (this == &other)
-                return *this;
+        Node & operator=(const Node & other);
 
-            czxid = other.czxid;
-            mzxid = other.mzxid;
-            pzxid = other.pzxid;
-            acl_id = other.acl_id;
-            has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
-            is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
-            ephemeral_or_children_data = other.ephemeral_or_children_data;
-            data_size = other.data_size;
-            version = other.version;
-            cversion = other.cversion;
-            aversion = other.aversion;
+        Node(const Node & other);
 
-            if (data_size != 0)
-            {
-                data = new char[data_size];
-                memcpy(data, other.data, data_size);
-            }
-            return *this;
-        }
-
-        Node(const Node & other)
-        {
-            *this = other;
-        }
+        bool empty() const;
 
         bool isEphemeral() const
         {
-
             return is_ephemeral_and_mtime.is_ephemeral;
         }
 
@@ -123,6 +92,12 @@ public:
             return isEphemeral() ? ephemeral_or_children_data.ephemeral_owner : 0;
         }
 
+        void setEphemeralOwner(int64_t ephemeral_owner)
+        {
+            is_ephemeral_and_mtime.is_ephemeral = true;
+            ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
+        }
+
         int32_t numChildren() const
         {
             return ephemeral_or_children_data.children_info.num_children;
@@ -130,6 +105,7 @@ public:
 
         void increaseNumChildren()
         {
+            chassert(!isEphemeral());
             ++ephemeral_or_children_data.children_info.num_children;
         }
 
@@ -138,6 +114,11 @@ public:
             return ephemeral_or_children_data.children_info.seq_num;
         }
 
+        void setSeqNum(int32_t seq_num)
+        {
+            ephemeral_or_children_data.children_info.seq_num = seq_num;
+        }
+
         void increaseSeqNum()
         {
             ++ephemeral_or_children_data.children_info.seq_num;
diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index 6a763a93210..5533913b3b8 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -18,11 +18,11 @@ struct ListNode
     StringRef key;
     V value;
 
-    struct 
+    struct
     {
         bool active_in_map : 1;
         bool free_key : 1;
-        uint64_t version : 6;
+        uint64_t version : 62;
     } node_metadata{false, false, 0};
 
     void setInactiveInMap()
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index 11a33e198a9..2e173667e28 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -101,30 +101,37 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP
         KeeperStorage::Node node{};
         String data;
         Coordination::read(data, in);
-        node.setData(std::move(data));
+        node.setData(data);
         Coordination::read(node.acl_id, in);
 
         /// Deserialize stat
-        Coordination::read(node.src/Coordination/ZooKeeperDataReader.cppstat.czxid, in);
-        Coordination::read(node.stat.mzxid, in);
+        Coordination::read(node.czxid, in);
+        Coordination::read(node.mzxid, in);
         /// For some reason ZXID specified in filename can be smaller
         /// then actual zxid from nodes. In this case we will use zxid from nodes.
-        max_zxid = std::max(max_zxid, node.stat.mzxid);
+        max_zxid = std::max(max_zxid, node.mzxid);
 
-        Coordination::read(node.stat.ctime, in);
-        Coordination::read(node.stat.mtime, in);
-        Coordination::read(node.stat.version, in);
-        Coordination::read(node.stat.cversion, in);
-        Coordination::read(node.stat.aversion, in);
-        Coordination::read(node.stat.ephemeralOwner, in);
-        Coordination::read(node.stat.pzxid, in);
+        int64_t ctime;
+        Coordination::read(ctime, in);
+        node.setCtime(ctime);
+        int64_t mtime;
+        Coordination::read(mtime, in);
+        node.setMtime(mtime);
+        Coordination::read(node.version, in);
+        Coordination::read(node.cversion, in);
+        Coordination::read(node.aversion, in);
+        int64_t ephemeral_owner;
+        Coordination::read(ephemeral_owner, in);
+        if (ephemeral_owner != 0)
+          node.setEphemeralOwner(ephemeral_owner);
+        Coordination::read(node.pzxid, in);
         if (!path.empty())
         {
-            node.seq_num = node.stat.cversion;
+            node.setSeqNum(node.cversion);
             storage.container.insertOrReplace(path, node);
 
-            if (node.stat.ephemeralOwner != 0)
-                storage.ephemerals[node.stat.ephemeralOwner].insert(path);
+            if (ephemeral_owner != 0)
+                storage.ephemerals[ephemeral_owner].insert(path);
 
             storage.acl_map.addUsage(node.acl_id);
         }
@@ -139,7 +146,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP
         if (itr.key != "/")
         {
             auto parent_path = parentNodePath(itr.key);
-            storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseNodeName(my_path)); ++value.stat.numChildren; });
+            storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); });
         }
     }
 
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 59a550177a4..bd9dc4c3fd3 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1508,7 +1508,7 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s
     using Node = DB::KeeperStorage::Node;
     Node node{};
     node.setData(data);
-    node.stat.ephemeralOwner = ephemeral_owner;
+    node.setEphemeralOwner(ephemeral_owner);
     storage.container.insertOrReplace(path, node);
     auto child_it = storage.container.find(path);
     auto child_path = DB::getBaseNodeName(child_it->key);
@@ -1517,7 +1517,7 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s
         [&](auto & parent)
         {
             parent.addChild(child_path);
-            parent.stat.numChildren++;
+            parent.increaseNumChildren();
         });
 }
 
@@ -1530,12 +1530,12 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple)
     DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression);
 
     DB::KeeperStorage storage(500, "", keeper_context);
-    addNode(storage, "/hello", "world", 1);
-    addNode(storage, "/hello/somepath", "somedata", 3);
+    addNode(storage, "/hello1", "world", 1);
+    addNode(storage, "/hello2", "somedata", 3);
     storage.session_id_counter = 5;
     storage.zxid = 2;
-    storage.ephemerals[3] = {"/hello"};
-    storage.ephemerals[1] = {"/hello/somepath"};
+    storage.ephemerals[3] = {"/hello2"};
+    storage.ephemerals[1] = {"/hello1"};
     storage.getSessionID(130);
     storage.getSessionID(130);
 
@@ -1556,13 +1556,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple)
     auto [restored_storage, snapshot_meta, _] = manager.deserializeSnapshotFromBuffer(debuf);
 
     EXPECT_EQ(restored_storage->container.size(), 6);
-    EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 2);
-    EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1);
-    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0);
+    EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 3);
+    EXPECT_EQ(restored_storage->container.getValue("/hello1").getChildren().size(), 0);
+    EXPECT_EQ(restored_storage->container.getValue("/hello2").getChildren().size(), 0);
 
     EXPECT_EQ(restored_storage->container.getValue("/").getData(), "");
-    EXPECT_EQ(restored_storage->container.getValue("/hello").getData(), "world");
-    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getData(), "somedata");
+    EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
+    EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
     EXPECT_EQ(restored_storage->session_id_counter, 7);
     EXPECT_EQ(restored_storage->zxid, 2);
     EXPECT_EQ(restored_storage->ephemerals.size(), 2);
@@ -2251,12 +2251,12 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
     DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression);
 
     DB::KeeperStorage storage(500, "", keeper_context);
-    addNode(storage, "/hello", "world", 1);
-    addNode(storage, "/hello/somepath", "somedata", 3);
+    addNode(storage, "/hello1", "world", 1);
+    addNode(storage, "/hello2", "somedata", 3);
     storage.session_id_counter = 5;
     storage.zxid = 2;
-    storage.ephemerals[3] = {"/hello"};
-    storage.ephemerals[1] = {"/hello/somepath"};
+    storage.ephemerals[3] = {"/hello2"};
+    storage.ephemerals[1] = {"/hello1"};
     storage.getSessionID(130);
     storage.getSessionID(130);
 
@@ -2273,13 +2273,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
     auto [restored_storage, snapshot_meta, _] = new_manager.deserializeSnapshotFromBuffer(debuf);
 
     EXPECT_EQ(restored_storage->container.size(), 6);
-    EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 2);
-    EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1);
-    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0);
+    EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 3);
+    EXPECT_EQ(restored_storage->container.getValue("/hello1").getChildren().size(), 0);
+    EXPECT_EQ(restored_storage->container.getValue("/hello2").getChildren().size(), 0);
 
     EXPECT_EQ(restored_storage->container.getValue("/").getData(), "");
-    EXPECT_EQ(restored_storage->container.getValue("/hello").getData(), "world");
-    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getData(), "somedata");
+    EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
+    EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
     EXPECT_EQ(restored_storage->session_id_counter, 7);
     EXPECT_EQ(restored_storage->zxid, 2);
     EXPECT_EQ(restored_storage->ephemerals.size(), 2);
@@ -2948,7 +2948,7 @@ TEST_P(CoordinationTest, TestCheckNotExistsRequest)
     create_path("/test_node");
     auto node_it = storage.container.find("/test_node");
     ASSERT_NE(node_it, storage.container.end());
-    auto node_version = node_it->value.stat.version;
+    auto node_version = node_it->value.version;
 
     {
         SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS");
diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp
index 2212f7158ae..a3e85f670d6 100644
--- a/utils/keeper-bench/Generator.cpp
+++ b/utils/keeper-bench/Generator.cpp
@@ -455,6 +455,7 @@ Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coo
 
     auto request = std::make_shared<ZooKeeperCreateRequest>();
     request->acls = acls;
+    request->is_sequential = true;
 
     std::string path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString();
 
diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp
index e06b301edbf..12acd0a8db9 100644
--- a/utils/keeper-data-dumper/main.cpp
+++ b/utils/keeper-data-dumper/main.cpp
@@ -25,13 +25,13 @@ void dumpMachine(std::shared_ptr<KeeperStateMachine> machine)
         keys.pop();
         std::cout << key << "\n";
         auto value = storage.container.getValue(key);
-        std::cout << "\tStat: {version: " << value.stat.version <<
-            ", mtime: " << value.stat.mtime <<
-            ", emphemeralOwner: " << value.stat.ephemeralOwner <<
-            ", czxid: " << value.stat.czxid <<
-            ", mzxid: " << value.stat.mzxid <<
-            ", numChildren: " << value.stat.numChildren <<
-            ", dataLength: " << value.getData().size() <<
+        std::cout << "\tStat: {version: " << value.version <<
+            ", mtime: " << value.mtime() <<
+            ", emphemeralOwner: " << value.ephemeralOwner() <<
+            ", czxid: " << value.czxid <<
+            ", mzxid: " << value.mzxid <<
+            ", numChildren: " << value.numChildren() <<
+            ", dataLength: " << value.data_size <<
             "}" << std::endl;
         std::cout << "\tData: " << storage.container.getValue(key).getData() << std::endl;
 

From b60228af3fd7fca55df2a2e015495f85bb905f9d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 16 Jan 2024 19:02:32 +0000
Subject: [PATCH 348/884] Allow to send a chain of subqueries for parallel
 replicas with analyzer.

---
 src/Planner/Planner.cpp                       |  41 +-
 src/Planner/Planner.h                         |   4 +
 src/Planner/PlannerContext.h                  |  10 +-
 src/Planner/PlannerJoinTree.cpp               |  10 +
 src/Planner/PlannerJoinTree.h                 |   1 +
 src/Planner/findParallelReplicasQuery.cpp     | 381 ++++++++++++++++++
 src/Planner/findParallelReplicasQuery.h       |  30 ++
 src/Storages/StorageDistributed.cpp           |   2 +-
 src/Storages/StorageMergeTree.cpp             |   2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |   2 +-
 src/Storages/buildQueryTreeForShard.cpp       |   4 +-
 src/Storages/buildQueryTreeForShard.h         |   5 +-
 ...llel_replicas_joins_and_analyzer.reference | 251 ++++++++++++
 ...7_parallel_replicas_joins_and_analyzer.sql | 123 ++++++
 14 files changed, 850 insertions(+), 16 deletions(-)
 create mode 100644 src/Planner/findParallelReplicasQuery.cpp
 create mode 100644 src/Planner/findParallelReplicasQuery.h
 create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
 create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index efccadcbe1a..d8f9153cd4f 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -64,6 +64,7 @@
 #include <Analyzer/AggregationUtils.h>
 #include <Analyzer/WindowFunctionsUtils.h>
 
+#include <Planner/findParallelReplicasQuery.h>
 #include <Planner/Utils.h>
 #include <Planner/PlannerContext.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -481,6 +482,11 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
         settings.max_block_size,
         settings.min_hit_rate_to_use_consecutive_keys_optimization);
 
+    // WriteBufferFromOwnString buf;
+    // params.explain(buf, 0);
+    // std::cerr << "........... " << buf.str() << std::endl;
+    // std::cerr << query_plan.getCurrentDataStream().header.dumpStructure() << std::endl;
+
     bool is_remote_storage = false;
     bool parallel_replicas_from_merge_tree = false;
 
@@ -1066,7 +1072,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
         Planner subquery_planner(
             query_tree,
             subquery_options,
-            std::make_shared<GlobalPlannerContext>()); //planner_context->getGlobalPlannerContext());
+            std::make_shared<GlobalPlannerContext>(nullptr)); //planner_context->getGlobalPlannerContext());
         subquery_planner.buildQueryPlanIfNeeded();
 
         subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));
@@ -1169,7 +1175,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_,
     SelectQueryOptions & select_query_options_)
     : query_tree(query_tree_)
     , select_query_options(select_query_options_)
-    , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared<GlobalPlannerContext>()))
+    , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared<GlobalPlannerContext>(findParallelReplicasQuery(query_tree, select_query_options))))
 {
 }
 
@@ -1232,6 +1238,8 @@ void Planner::buildPlanForUnionNode()
         query_planner.buildQueryPlanIfNeeded();
         for (const auto & row_policy : query_planner.getUsedRowPolicies())
             used_row_policies.insert(row_policy);
+        const auto & mapping = query_planner.getQueryNodeToPlanStepMapping();
+        query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
         auto query_node_plan = std::make_unique<QueryPlan>(std::move(query_planner).extractQueryPlan());
         query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
         query_plans.push_back(std::move(query_node_plan));
@@ -1411,16 +1419,31 @@ void Planner::buildPlanForQueryNode()
         }
     }
 
-    auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
-    auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree,
-        select_query_info,
-        select_query_options,
-        top_level_identifiers,
-        planner_context);
+    JoinTreeQueryPlan join_tree_query_plan;
+    if (planner_context->getMutableQueryContext()->canUseTaskBasedParallelReplicas()
+        && planner_context->getGlobalPlannerContext()->parallel_replicas_node == &query_node)
+    {
+        join_tree_query_plan = buildQueryPlanForParallelReplicas(query_node, planner_context, select_query_info.storage_limits);
+    }
+    else
+    {
+        auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
+        join_tree_query_plan = buildJoinTreeQueryPlan(query_tree,
+            select_query_info,
+            select_query_options,
+            top_level_identifiers,
+            planner_context);
+    }
 
     auto from_stage = join_tree_query_plan.from_stage;
     query_plan = std::move(join_tree_query_plan.query_plan);
     used_row_policies = std::move(join_tree_query_plan.used_row_policies);
+    auto & mapping = join_tree_query_plan.query_node_to_plan_step_mapping;
+    query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
+
+    // WriteBufferFromOwnString buf;
+    // query_plan.explainPlan(buf, {.header = true});
+    // LOG_TRACE(&Poco::Logger::get("Planner"), "Plan\n{}", buf.str());
 
     LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}",
         query_tree->formatConvertedASTForErrorMessage(),
@@ -1690,6 +1713,8 @@ void Planner::buildPlanForQueryNode()
 
     if (!select_query_options.only_analyze)
         addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute);
+
+    query_node_to_plan_step_mapping[&query_node] = query_plan.getRootNode();
 }
 
 SelectQueryInfo Planner::buildSelectQueryInfo() const
diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h
index 2177ed59fc6..5667686b058 100644
--- a/src/Planner/Planner.h
+++ b/src/Planner/Planner.h
@@ -65,6 +65,9 @@ public:
         return planner_context;
     }
 
+    using QueryNodeToPlanStepMapping = std::unordered_map<const QueryNode *, const QueryPlan::Node *>;
+    const QueryNodeToPlanStepMapping & getQueryNodeToPlanStepMapping() const { return query_node_to_plan_step_mapping; }
+
 private:
     void buildPlanForUnionNode();
 
@@ -76,6 +79,7 @@ private:
     QueryPlan query_plan;
     StorageLimitsList storage_limits;
     std::set<std::string> used_row_policies;
+    QueryNodeToPlanStepMapping query_node_to_plan_step_mapping;
 };
 
 }
diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h
index d7ea4fd95dd..a5a64fc0a6b 100644
--- a/src/Planner/PlannerContext.h
+++ b/src/Planner/PlannerContext.h
@@ -18,10 +18,16 @@ namespace DB
   *
   * 1. Column identifiers.
   */
+
+class QueryNode;
+
 class GlobalPlannerContext
 {
 public:
-    GlobalPlannerContext() = default;
+    explicit GlobalPlannerContext(const QueryNode * parallel_replicas_node_)
+        : parallel_replicas_node(parallel_replicas_node_)
+    {
+    }
 
     /** Create column identifier for column node.
       *
@@ -38,6 +44,8 @@ public:
     /// Check if context has column identifier
     bool hasColumnIdentifier(const ColumnIdentifier & column_identifier);
 
+    const QueryNode * const parallel_replicas_node = nullptr;
+
 private:
     std::unordered_set<ColumnIdentifier> column_identifiers;
 };
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index ab25f6d2423..a35f396aee9 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -593,6 +593,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
     auto * union_node = table_expression->as<UnionNode>();
 
     QueryPlan query_plan;
+    std::unordered_map<const QueryNode *, const QueryPlan::Node *> query_node_to_plan_step_mapping;
     std::set<std::string> used_row_policies;
 
     if (table_node || table_function_node)
@@ -895,6 +896,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
             /// Propagate storage limits to subquery
             subquery_planner.addStorageLimits(*select_query_info.storage_limits);
             subquery_planner.buildQueryPlanIfNeeded();
+            const auto & mapping = subquery_planner.getQueryNodeToPlanStepMapping();
+            query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
             query_plan = std::move(subquery_planner).extractQueryPlan();
         }
     }
@@ -954,6 +957,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
         .query_plan = std::move(query_plan),
         .from_stage = from_stage,
         .used_row_policies = std::move(used_row_policies),
+        .query_node_to_plan_step_mapping = std::move(query_node_to_plan_step_mapping),
     };
 }
 
@@ -1500,11 +1504,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
     if (join_clauses_and_actions.right_join_expressions_actions)
         left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions));
 
+    auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping);
+    auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping;
+    mapping.insert(r_mapping.begin(), r_mapping.end());
+
     return JoinTreeQueryPlan{
         .query_plan = std::move(result_plan),
         .from_stage = QueryProcessingStage::FetchColumns,
         .used_row_policies = std::move(left_join_tree_query_plan.used_row_policies),
         .actions_dags = std::move(left_join_tree_query_plan.actions_dags),
+        .query_node_to_plan_step_mapping = std::move(mapping),
     };
 }
 
@@ -1591,6 +1600,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
         .from_stage = QueryProcessingStage::FetchColumns,
         .used_row_policies = std::move(join_tree_query_plan.used_row_policies),
         .actions_dags = std::move(join_tree_query_plan.actions_dags),
+        .query_node_to_plan_step_mapping = std::move(join_tree_query_plan.query_node_to_plan_step_mapping),
     };
 }
 
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index c5a7d14fa55..a21438d466f 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -17,6 +17,7 @@ struct JoinTreeQueryPlan
     QueryProcessingStage::Enum from_stage;
     std::set<std::string> used_row_policies;
     std::vector<ActionsDAGPtr> actions_dags;
+    std::unordered_map<const QueryNode *, const QueryPlan::Node *> query_node_to_plan_step_mapping;
 };
 
 /// Build JOIN TREE query plan for query node
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
new file mode 100644
index 00000000000..08ca559dd77
--- /dev/null
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -0,0 +1,381 @@
+#include <Planner/findParallelReplicasQuery.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Processors/QueryPlan/JoinStep.h>
+#include <Storages/buildQueryTreeForShard.h>
+#include <Interpreters/ClusterProxy/executeQuery.h>
+#include <Planner/PlannerJoinTree.h>
+#include <Planner/Utils.h>
+#include "Analyzer/ArrayJoinNode.h"
+#include "Analyzer/InDepthQueryTreeVisitor.h"
+#include "Analyzer/JoinNode.h"
+#include "Analyzer/QueryNode.h"
+#include "Analyzer/TableNode.h"
+#include "Analyzer/UnionNode.h"
+#include "Parsers/ASTSubquery.h"
+#include "Parsers/queryToString.h"
+#include "Processors/QueryPlan/ExpressionStep.h"
+#include "Processors/QueryPlan/FilterStep.h"
+#include "Storages/MergeTree/MergeTreeData.h"
+#include <Storages/StorageDummy.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int UNSUPPORTED_METHOD;
+}
+
+std::stack<const QueryNode *> getSupportingParallelReplicasQuery(const IQueryTreeNode * query_tree_node)
+{
+    std::stack<const QueryNode *> res;
+
+    while (query_tree_node)
+    {
+        auto join_tree_node_type = query_tree_node->getNodeType();
+
+        switch (join_tree_node_type)
+        {
+            case QueryTreeNodeType::TABLE:
+            {
+                const auto & table_node = query_tree_node->as<TableNode &>();
+                const auto & storage = table_node.getStorage();
+                if (std::dynamic_pointer_cast<MergeTreeData>(storage) || typeid_cast<const StorageDummy *>(storage.get()))
+                    return res;
+
+                return {};
+            }
+            case QueryTreeNodeType::TABLE_FUNCTION:
+            {
+                return {};
+            }
+            case QueryTreeNodeType::QUERY:
+            {
+                const auto & query_node_to_process = query_tree_node->as<QueryNode &>();
+                query_tree_node = query_node_to_process.getJoinTree().get();
+                res.push(&query_node_to_process);
+                break;
+            }
+            case QueryTreeNodeType::UNION:
+            {
+                const auto & union_node = query_tree_node->as<UnionNode &>();
+                const auto & union_queries = union_node.getQueries().getNodes();
+
+                if (union_queries.empty())
+                    return {};
+
+                query_tree_node = union_queries.front().get();
+                break;
+            }
+            case QueryTreeNodeType::ARRAY_JOIN:
+            {
+                const auto & array_join_node = query_tree_node->as<ArrayJoinNode &>();
+                query_tree_node = array_join_node.getTableExpression().get();
+                break;
+            }
+            case QueryTreeNodeType::JOIN:
+            {
+                const auto & join_node = query_tree_node->as<JoinNode &>();
+                auto join_kind = join_node.getKind();
+                if (join_kind != JoinKind::Inner && join_kind != JoinKind::Left)
+                    return {};
+
+                query_tree_node = join_node.getLeftTableExpression().get();
+                break;
+            }
+            default:
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Unexpected node type for table expression. "
+                                "Expected table, table function, query, union, join or array join. Actual {}",
+                                query_tree_node->getNodeTypeName());
+            }
+        }
+    }
+
+    return res;
+}
+
+class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitor<ReplaceTableNodeToDummyVisitor, true>
+{
+public:
+    using Base = InDepthQueryTreeVisitor<ReplaceTableNodeToDummyVisitor, true>;
+    using Base::Base;
+
+    void visitImpl(const QueryTreeNodePtr & node)
+    {
+        auto * table_node = node->as<TableNode>();
+        auto * table_function_node = node->as<TableFunctionNode>();
+
+        if (table_node || table_function_node)
+        {
+            const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
+            auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
+
+            auto storage_dummy
+                = std::make_shared<StorageDummy>(storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options)));
+
+            auto dummy_table_node = std::make_shared<TableNode>(std::move(storage_dummy), context);
+
+            dummy_table_node->setAlias(node->getAlias());
+            replacement_map.emplace(node.get(), std::move(dummy_table_node));
+        }
+    }
+
+    ContextPtr context;
+    std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr> replacement_map;
+};
+
+QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, const ContextPtr & context)
+{
+    ReplaceTableNodeToDummyVisitor visitor;
+    visitor.context = context;
+    visitor.visit(query);
+
+    return query->cloneAndReplace(visitor.replacement_map);
+}
+
+const QueryNode * findParallelReplicasQuery(
+    std::stack<const QueryNode *> stack,
+    const std::unordered_map<const QueryNode *, const QueryPlan::Node *> & mapping)
+{
+    const QueryPlan::Node * prev_checked_node = nullptr;
+    const QueryNode * res = nullptr;
+
+    while (!stack.empty())
+    {
+        const QueryNode * subquery_node = stack.top();
+        stack.pop();
+
+        // std::cerr << "----- trying " << reinterpret_cast<const void *>(subquery_node) << std::endl;
+
+        // const QueryNode * mapped_node = subquery_node;
+        // if (auto it = replacement_map.find(subquery_node); it != replacement_map.end())
+        //     mapped_node = it->second.get();
+
+        auto it = mapping.find(subquery_node);
+        /// This should not happen ideally.
+        if (it == mapping.end())
+            break;
+
+        const QueryPlan::Node * curr_node = it->second;
+        const QueryPlan::Node * next_node_to_check = curr_node;
+        bool can_distribute_full_node = true;
+
+        // std::cerr << "trying " << curr_node->step->getName() << '\n' << subquery_node->dumpTree() << std::endl;
+
+        while (next_node_to_check && next_node_to_check != prev_checked_node)
+        {
+            const auto & children = next_node_to_check->children;
+            auto * step = next_node_to_check->step.get();
+
+            if (children.empty())
+            {
+                /// Found a source step. This should be possible only in the first iteration.
+                if (prev_checked_node)
+                    return nullptr;
+
+                next_node_to_check = nullptr;
+            }
+            else if (children.size() == 1)
+            {
+                const auto * expression = typeid_cast<ExpressionStep *>(step);
+                const auto * filter = typeid_cast<FilterStep *>(step);
+                if (!expression && !filter)
+                    can_distribute_full_node = false;
+
+                next_node_to_check = children.front();
+            }
+            else
+            {
+                const auto * join = typeid_cast<JoinStep *>(step);
+                /// We've checked that JOIN is INNER/LEFT in query tree.
+                /// Don't distribute UNION node.
+                if (!join)
+                    return res;
+
+                next_node_to_check = children.front();
+            }
+        }
+
+        /// Current node contains steps like GROUP BY / DISTINCT
+        /// Will try to execute query up to WithMergableStage
+        if (!can_distribute_full_node)
+        {
+            /// Current query node does not contain subqueries already.
+            /// We can execute parallel replicas over storage.
+            if (!res)
+                return nullptr;
+
+            return subquery_node;
+        }
+
+        /// Query is simple enough to be fully distributed.
+        res = subquery_node;
+        prev_checked_node = curr_node;
+    }
+
+    return res;
+}
+
+const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options)
+{
+    if (select_query_options.only_analyze)
+        return nullptr;
+
+    auto * query_node = query_tree_node->as<QueryNode>();
+    auto * union_node = query_tree_node->as<UnionNode>();
+
+    if (!query_node && !union_node)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "Expected QUERY or UNION node. Actual {}",
+            query_tree_node->formatASTForErrorMessage());
+
+    auto context = query_node ? query_node->getContext() : union_node->getContext();
+    // const auto & settings = context->getSettingsRef();
+
+    if (!context->canUseParallelReplicasOnInitiator())
+        return nullptr;
+
+    auto stack = getSupportingParallelReplicasQuery(query_tree_node.get());
+    // std::cerr << "=============== findParallelReplicasQuery stack size " << stack.size() << std::endl;
+    // std::cerr << "=============== findParallelReplicasQuery tree\n " << query_tree_node->dumpTree() << std::endl;
+    // std::cerr << "=============== findParallelReplicasQuery trace \n" << StackTrace().toString() << std::endl;
+    /// Empty stack means that storage does not support parallel replicas.
+    if (stack.empty())
+        return nullptr;
+
+    /// We don't have any subquery and storage can process parallel replicas by itself.
+    if (stack.top() == query_tree_node.get())
+        return nullptr;
+
+    auto mutable_context = Context::createCopy(context);
+    mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
+
+    ResultReplacementMap replacement_map;
+    auto updated_query_tree = replaceTablesWithDummyTables(query_tree_node, mutable_context);
+
+    // std::cerr << "=============== findParallelReplicasQuery updated tree\n " << updated_query_tree->dumpTree() << std::endl;
+
+    SelectQueryOptions options;
+    //options.only_analyze = true;
+    Planner planner(updated_query_tree, options, std::make_shared<GlobalPlannerContext>(nullptr));
+    planner.buildQueryPlanIfNeeded();
+
+    // WriteBufferFromOwnString buf;
+    // planner.getQueryPlan().explainPlan(buf, {.actions = true});
+    // std::cerr << buf.str() << std::endl;
+
+    auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get());
+
+    //const auto & result_query_plan = planner.getQueryPlan();
+    const auto & mapping = planner.getQueryNodeToPlanStepMapping();
+
+    // for (const auto & [k, v] : mapping)
+    //     std::cerr << "----- " << v->step->getName() << '\n' << reinterpret_cast<const void *>(k) << std::endl;
+
+    const auto * res = findParallelReplicasQuery(new_stack, mapping);
+    // if (res)
+    //     std::cerr << "Result subtree " << res->dumpTree() << std::endl;
+    // else
+    //     std::cerr << "Result subtree is empty" << std::endl;
+
+    if (res)
+    {
+        while (!new_stack.empty())
+        {
+            if (res == new_stack.top())
+                return stack.top();
+
+            stack.pop();
+            new_stack.pop();
+        }
+    }
+
+    return res;
+}
+
+static void removeCTEs(ASTPtr & ast)
+{
+    std::stack<IAST *> stack;
+    stack.push(ast.get());
+    while (!stack.empty())
+    {
+        auto * node = stack.top();
+        stack.pop();
+
+        if (auto * subquery = typeid_cast<ASTSubquery *>(node))
+            subquery->cte_name = {};
+
+        for (const auto & child : node->children)
+            stack.push(child.get());
+    }
+}
+
+JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
+    const QueryNode & query_node,
+    const PlannerContextPtr & planner_context,
+    std::shared_ptr<const StorageLimitsList> storage_limits)
+{
+    // std::cerr << "buildQueryPlanForParallelReplicas 1 " << query_node.dumpTree() << std::endl;
+    ASTPtr modified_query_ast;
+    Block header;
+    auto processed_stage = QueryProcessingStage::WithMergeableState;
+    auto context = planner_context->getQueryContext();
+
+    QueryTreeNodePtr modified_query_tree = query_node.clone();
+
+    Block initial_header = InterpreterSelectQueryAnalyzer::getSampleBlock(
+        modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
+
+    rewriteJoinToGlobalJoin(modified_query_tree);
+    // std::cerr << "buildQueryPlanForParallelReplicas 1 " << modified_query_tree->dumpTree() << std::endl;
+    modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
+    // std::cerr << "buildQueryPlanForParallelReplicas 2 " << modified_query_tree->dumpTree() << std::endl;
+    modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
+    removeCTEs(modified_query_ast);
+
+    // std::cerr << "buildQueryPlanForParallelReplicas AST " << queryToString(modified_query_ast) << std::endl;
+    // std::cerr << "buildQueryPlanForParallelReplicas AST " << modified_query_ast->dumpTree() << std::endl;
+
+    // SelectQueryOptions opt(processed_stage);
+    // Planner planner(modified_query_tree, opt, std::make_shared<GlobalPlannerContext>(nullptr));
+    // planner.buildQueryPlanIfNeeded();
+    // header = planner.getQueryPlan().getCurrentDataStream().header;
+
+    // InterpreterSelectQueryAnalyzer interpreter(modified_query_tree, context, SelectQueryOptions(processed_stage));
+    // header = interpreter.getSampleBlock();
+
+    header = InterpreterSelectQueryAnalyzer::getSampleBlock(
+        modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
+
+    ClusterProxy::SelectStreamFactory select_stream_factory =
+        ClusterProxy::SelectStreamFactory(
+            header,
+            {},
+            {},
+            processed_stage);
+
+    QueryPlan query_plan;
+    ClusterProxy::executeQueryWithParallelReplicas(
+        query_plan,
+        select_stream_factory,
+        modified_query_ast,
+        context,
+        storage_limits);
+
+    auto converting = ActionsDAG::makeConvertingActions(
+        header.getColumnsWithTypeAndName(),
+        initial_header.getColumnsWithTypeAndName(),
+        ActionsDAG::MatchColumnsMode::Position);
+
+    auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting));
+    step->setStepDescription("Convert distributed names");
+    query_plan.addStep(std::move(step));
+
+    return {std::move(query_plan), std::move(processed_stage), {}, {}, {}};
+}
+
+}
diff --git a/src/Planner/findParallelReplicasQuery.h b/src/Planner/findParallelReplicasQuery.h
new file mode 100644
index 00000000000..56085604539
--- /dev/null
+++ b/src/Planner/findParallelReplicasQuery.h
@@ -0,0 +1,30 @@
+#pragma once
+#include <list>
+#include <memory>
+
+namespace DB
+{
+
+class QueryNode;
+
+class IQueryTreeNode;
+using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
+
+struct SelectQueryOptions;
+
+const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
+
+struct JoinTreeQueryPlan;
+
+class PlannerContext;
+using PlannerContextPtr = std::shared_ptr<PlannerContext>;
+
+struct StorageLimits;
+using StorageLimitsList = std::list<StorageLimits>;
+
+JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
+    const QueryNode & query_node,
+    const PlannerContextPtr & planner_context,
+    std::shared_ptr<const StorageLimitsList> storage_limits);
+
+}
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index f711ed5ac92..70a678b0d86 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -796,7 +796,7 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
 
     auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression));
 
-    return buildQueryTreeForShard(query_info, query_tree_to_modify);
+    return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify);
 }
 
 }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 0f75c726bce..77c506fca7f 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -225,7 +225,7 @@ void StorageMergeTree::read(
         {
             QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
             rewriteJoinToGlobalJoin(modified_query_tree);
-            modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
+            modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree);
             header = InterpreterSelectQueryAnalyzer::getSampleBlock(
                 modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
             modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6bd57cc4d6d..998bad17e53 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5413,7 +5413,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
     {
         QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
         rewriteJoinToGlobalJoin(modified_query_tree);
-        modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
+        modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree);
 
         header = InterpreterSelectQueryAnalyzer::getSampleBlock(
             modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 5ea28d9e09c..26212d15917 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -295,10 +295,8 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node,
 
 }
 
-QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify)
+QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify)
 {
-    auto & planner_context = query_info.planner_context;
-
     CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor;
     collect_column_source_to_columns_visitor.visit(query_tree_to_modify);
 
diff --git a/src/Storages/buildQueryTreeForShard.h b/src/Storages/buildQueryTreeForShard.h
index eec5a0dc38a..3ffb5707c2b 100644
--- a/src/Storages/buildQueryTreeForShard.h
+++ b/src/Storages/buildQueryTreeForShard.h
@@ -10,7 +10,10 @@ struct SelectQueryInfo;
 class IQueryTreeNode;
 using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
 
-QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify);
+class PlannerContext;
+using PlannerContextPtr = std::shared_ptr<PlannerContext>;
+
+QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify);
 
 void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify);
 
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
new file mode 100644
index 00000000000..fdb4a761015
--- /dev/null
+++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
@@ -0,0 +1,251 @@
+-- { echoOn }
+
+-- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode.
+select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  ReadFromRemoteParallelReplicas
+--
+-- The same query with cte;
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      ReadFromRemoteParallelReplicas
+--
+-- GROUP BY should work up to WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+54	54	50	50	12	12	0
+64	64	0	0	0	0	1
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      MergingAggregated
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- ORDER BY in sub3 : sub1 is fully pushed, sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- ORDER BY in sub1 : sub1 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          Join
+            Expression
+              ReadFromRemoteParallelReplicas
+            Expression
+              ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- RIGHT JOIN in sub3: sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+6	6	6	6	0	0
+8	8	8	8	0	0
+10	10	10	10	0	0
+12	12	12	12	12	12
+14	14	14	14	0	0
+4	4	0	0	0	0
+3	3	0	0	0	0
+5	5	0	0	0	0
+1	1	0	0	0	0
+7	7	0	0	0	0
+9	9	0	0	0	0
+15	15	0	0	0	0
+11	11	0	0	0	0
+13	13	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Join
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+    Expression
+      ReadFromRemoteParallelReplicas
+--
+-- RIGHT JOIN in sub5: sub5 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+0	0	1	1	0	0
+0	0	3	3	0	0
+0	0	4	4	0	0
+0	0	5	5	0	0
+0	0	6	6	6	6
+0	0	7	7	0	0
+0	0	8	8	8	8
+0	0	9	9	0	0
+0	0	10	10	10	10
+0	0	11	11	0	0
+12	12	12	12	12	12
+0	0	13	13	0	0
+0	0	14	14	14	14
+0	0	15	15	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          Join
+            Expression
+              ReadFromRemoteParallelReplicas
+            Expression
+              ReadFromRemoteParallelReplicas
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql
new file mode 100644
index 00000000000..a3a2269fabb
--- /dev/null
+++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql
@@ -0,0 +1,123 @@
+drop table if exists tab1;
+drop table if exists tab2;
+drop table if exists tab3;
+
+create table tab1 (x UInt32, y UInt32, shard UInt32) engine = MergeTree order by shard;
+create table tab2 (y UInt32, z UInt32) engine = MergeTree order by tuple();
+create table tab3 (z UInt32, a UInt32) engine = MergeTree order by tuple();
+
+insert into tab1 select number, number, number from numbers(16);
+insert into tab2 select number * 2, number * 2 from numbers(8);
+insert into tab3 select number * 4, number * 4 from numbers(4);
+
+-- { echoOn }
+
+-- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode.
+select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- The same query with cte;
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- GROUP BY should work up to WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- ORDER BY in sub3 : sub1 is fully pushed, sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- ORDER BY in sub1 : sub1 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- RIGHT JOIN in sub3: sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+--
+-- RIGHT JOIN in sub5: sub5 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;

From 7f2a5d38c55cd21d4b91f49df67ba4507f072bbd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 26 Jan 2024 11:18:50 +0000
Subject: [PATCH 349/884] Prohibit any inner join.

---
 src/Planner/findParallelReplicasQuery.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index 08ca559dd77..5519478fea2 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -79,7 +79,13 @@ std::stack<const QueryNode *> getSupportingParallelReplicasQuery(const IQueryTre
             {
                 const auto & join_node = query_tree_node->as<JoinNode &>();
                 auto join_kind = join_node.getKind();
-                if (join_kind != JoinKind::Inner && join_kind != JoinKind::Left)
+                auto join_strictness = join_node.getStrictness();
+
+                bool can_parallelize_join =
+                    join_kind == JoinKind::Left
+                    || (join_kind == JoinKind::Inner && join_strictness == JoinStrictness::All);
+
+                if (!can_parallelize_join)
                     return {};
 
                 query_tree_node = join_node.getLeftTableExpression().get();

From 1892318e465722df59f561aa2da0c5f35ecc6b64 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 26 Jan 2024 11:19:34 +0000
Subject: [PATCH 350/884] Squash temporary tables.

---
 src/Storages/buildQueryTreeForShard.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 26212d15917..f423d7e6a83 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -16,6 +16,7 @@
 #include <Storages/StorageDummy.h>
 #include <Planner/Utils.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
+#include <Processors/Transforms/SquashingChunksTransform.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -283,7 +284,16 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node,
 
     auto optimization_settings = QueryPlanOptimizationSettings::fromContext(mutable_context);
     auto build_pipeline_settings = BuildQueryPipelineSettings::fromContext(mutable_context);
-    auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings)));
+    auto builder = query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings);
+
+    /// It's 16M squashing. 16 is a multiplier for compression.
+    size_t min_block_size_bytes = DBMS_DEFAULT_BUFFER_SIZE * 16;
+    auto squashing = std::make_shared<SimpleSquashingChunksTransform>(builder->getHeader(), 0, min_block_size_bytes);
+
+    builder->resize(1);
+    builder->addTransform(std::move(squashing));
+
+    auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
 
     pipeline.complete(std::move(table_out));
     CompletedPipelineExecutor executor(pipeline);

From 8692e8f752fcafe5da21213ae1f9b7e51e498d48 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 29 Jan 2024 15:33:36 +0000
Subject: [PATCH 351/884] Add settings to squash external table blocks.

---
 src/Core/Settings.h                     | 2 ++
 src/Storages/buildQueryTreeForShard.cpp | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 2b36b83edd2..c42909b930f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -40,6 +40,8 @@ class IColumn;
     M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
     M(UInt64, min_insert_block_size_rows_for_materialized_views, 0, "Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)", 0) \
     M(UInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \
+    M(UInt64, min_external_table_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough.", 0) \
+    M(UInt64, min_external_table_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough.", 0) \
     M(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
     M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
     M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index f423d7e6a83..5f7afe99f33 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -286,9 +286,9 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node,
     auto build_pipeline_settings = BuildQueryPipelineSettings::fromContext(mutable_context);
     auto builder = query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings);
 
-    /// It's 16M squashing. 16 is a multiplier for compression.
-    size_t min_block_size_bytes = DBMS_DEFAULT_BUFFER_SIZE * 16;
-    auto squashing = std::make_shared<SimpleSquashingChunksTransform>(builder->getHeader(), 0, min_block_size_bytes);
+    size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows;
+    size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes;
+    auto squashing = std::make_shared<SimpleSquashingChunksTransform>(builder->getHeader(), min_block_size_rows, min_block_size_bytes);
 
     builder->resize(1);
     builder->addTransform(std::move(squashing));

From 15bf263153ed9cc9fa0332f7f3a1cc61bad2266d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 31 Jan 2024 13:00:46 +0000
Subject: [PATCH 352/884] Support non global in mode.

---
 src/Core/Settings.h                         |  1 +
 src/Planner/Planner.cpp                     |  7 +-
 src/Planner/PlannerContext.h                |  5 +-
 src/Planner/PlannerJoinTree.cpp             |  1 +
 src/Planner/findParallelReplicasQuery.cpp   | 92 ++++++++++++++++++++-
 src/Planner/findParallelReplicasQuery.h     |  2 +
 src/Storages/SelectQueryInfo.h              |  2 +
 src/Storages/StorageMergeTree.cpp           |  6 +-
 src/Storages/StorageReplicatedMergeTree.cpp |  7 +-
 src/Storages/buildQueryTreeForShard.cpp     | 41 +++++++--
 src/Storages/buildQueryTreeForShard.h       |  5 +-
 11 files changed, 154 insertions(+), 15 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c42909b930f..37987d70c9a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -187,6 +187,7 @@ class IColumn;
     M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
     M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
     M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
+    M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
     M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
     \
     M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index d8f9153cd4f..8dcce627cce 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1072,7 +1072,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
         Planner subquery_planner(
             query_tree,
             subquery_options,
-            std::make_shared<GlobalPlannerContext>(nullptr)); //planner_context->getGlobalPlannerContext());
+            std::make_shared<GlobalPlannerContext>(nullptr, nullptr)); //planner_context->getGlobalPlannerContext());
         subquery_planner.buildQueryPlanIfNeeded();
 
         subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));
@@ -1175,7 +1175,10 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_,
     SelectQueryOptions & select_query_options_)
     : query_tree(query_tree_)
     , select_query_options(select_query_options_)
-    , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared<GlobalPlannerContext>(findParallelReplicasQuery(query_tree, select_query_options))))
+    , planner_context(buildPlannerContext(query_tree, select_query_options,
+        std::make_shared<GlobalPlannerContext>(
+            findParallelReplicasQuery(query_tree, select_query_options),
+            findTableForParallelReplicas(query_tree, select_query_options))))
 {
 }
 
diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h
index a5a64fc0a6b..d0e1497061e 100644
--- a/src/Planner/PlannerContext.h
+++ b/src/Planner/PlannerContext.h
@@ -20,12 +20,14 @@ namespace DB
   */
 
 class QueryNode;
+class TableNode;
 
 class GlobalPlannerContext
 {
 public:
-    explicit GlobalPlannerContext(const QueryNode * parallel_replicas_node_)
+    explicit GlobalPlannerContext(const QueryNode * parallel_replicas_node_, const TableNode * parallel_replicas_table_)
         : parallel_replicas_node(parallel_replicas_node_)
+        , parallel_replicas_table(parallel_replicas_table_)
     {
     }
 
@@ -45,6 +47,7 @@ public:
     bool hasColumnIdentifier(const ColumnIdentifier & column_identifier);
 
     const QueryNode * const parallel_replicas_node = nullptr;
+    const TableNode * const parallel_replicas_table = nullptr;
 
 private:
     std::unordered_set<ColumnIdentifier> column_identifiers;
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index a35f396aee9..6cebc4696c0 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -604,6 +604,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
         auto table_expression_query_info = select_query_info;
         table_expression_query_info.table_expression = table_expression;
         table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions();
+        table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table;
 
         size_t max_streams = settings.max_threads;
         size_t max_threads_execute_query = settings.max_threads;
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index 5519478fea2..014052fecd2 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -267,7 +267,7 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
 
     SelectQueryOptions options;
     //options.only_analyze = true;
-    Planner planner(updated_query_tree, options, std::make_shared<GlobalPlannerContext>(nullptr));
+    Planner planner(updated_query_tree, options, std::make_shared<GlobalPlannerContext>(nullptr, nullptr));
     planner.buildQueryPlanIfNeeded();
 
     // WriteBufferFromOwnString buf;
@@ -303,6 +303,94 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
     return res;
 }
 
+static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * query_tree_node)
+{
+    while (query_tree_node)
+    {
+        auto join_tree_node_type = query_tree_node->getNodeType();
+
+        switch (join_tree_node_type)
+        {
+            case QueryTreeNodeType::TABLE:
+            {
+                const auto & table_node = query_tree_node->as<TableNode &>();
+                const auto & storage = table_node.getStorage();
+                if (std::dynamic_pointer_cast<MergeTreeData>(storage) || typeid_cast<const StorageDummy *>(storage.get()))
+                    return &table_node;
+
+                return {};
+            }
+            case QueryTreeNodeType::TABLE_FUNCTION:
+            {
+                return {};
+            }
+            case QueryTreeNodeType::QUERY:
+            {
+                const auto & query_node_to_process = query_tree_node->as<QueryNode &>();
+                query_tree_node = query_node_to_process.getJoinTree().get();
+                break;
+            }
+            case QueryTreeNodeType::UNION:
+            {
+                const auto & union_node = query_tree_node->as<UnionNode &>();
+                const auto & union_queries = union_node.getQueries().getNodes();
+
+                if (union_queries.empty())
+                    return {};
+
+                query_tree_node = union_queries.front().get();
+                break;
+            }
+            case QueryTreeNodeType::ARRAY_JOIN:
+            {
+                const auto & array_join_node = query_tree_node->as<ArrayJoinNode &>();
+                query_tree_node = array_join_node.getTableExpression().get();
+                break;
+            }
+            case QueryTreeNodeType::JOIN:
+            {
+                const auto & join_node = query_tree_node->as<JoinNode &>();
+                if (const auto * res = findTableForParallelReplicas(join_node.getLeftTableExpression().get()))
+                    return res;
+
+                query_tree_node = join_node.getRightTableExpression().get();
+                break;
+            }
+            default:
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Unexpected node type for table expression. "
+                                "Expected table, table function, query, union, join or array join. Actual {}",
+                                query_tree_node->getNodeTypeName());
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options)
+{
+    if (select_query_options.only_analyze)
+        return nullptr;
+
+    auto * query_node = query_tree_node->as<QueryNode>();
+    auto * union_node = query_tree_node->as<UnionNode>();
+
+    if (!query_node && !union_node)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "Expected QUERY or UNION node. Actual {}",
+            query_tree_node->formatASTForErrorMessage());
+
+    auto context = query_node ? query_node->getContext() : union_node->getContext();
+    // const auto & settings = context->getSettingsRef();
+
+    if (!context->canUseParallelReplicasOnFollower())
+        return nullptr;
+
+    return findTableForParallelReplicas(query_tree_node.get());
+}
+
 static void removeCTEs(ASTPtr & ast)
 {
     std::stack<IAST *> stack;
@@ -336,7 +424,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     Block initial_header = InterpreterSelectQueryAnalyzer::getSampleBlock(
         modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
 
-    rewriteJoinToGlobalJoin(modified_query_tree);
+    rewriteJoinToGlobalJoin(modified_query_tree, context);
     // std::cerr << "buildQueryPlanForParallelReplicas 1 " << modified_query_tree->dumpTree() << std::endl;
     modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
     // std::cerr << "buildQueryPlanForParallelReplicas 2 " << modified_query_tree->dumpTree() << std::endl;
diff --git a/src/Planner/findParallelReplicasQuery.h b/src/Planner/findParallelReplicasQuery.h
index 56085604539..ab30de3adaf 100644
--- a/src/Planner/findParallelReplicasQuery.h
+++ b/src/Planner/findParallelReplicasQuery.h
@@ -6,6 +6,7 @@ namespace DB
 {
 
 class QueryNode;
+class TableNode;
 
 class IQueryTreeNode;
 using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
@@ -13,6 +14,7 @@ using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
 struct SelectQueryOptions;
 
 const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
+const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
 
 struct JoinTreeQueryPlan;
 
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index 662a5c0ef5a..4cb88a6d3fc 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -161,6 +161,8 @@ struct SelectQueryInfo
     /// It's guaranteed to be present in JOIN TREE of `query_tree`
     QueryTreeNodePtr table_expression;
 
+    bool analyzer_can_use_parallel_replicas_on_follower = false;
+
     /// Table expression modifiers for storage
     std::optional<TableExpressionModifiers> table_expression_modifiers;
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 77c506fca7f..93d21c80698 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -224,7 +224,7 @@ void StorageMergeTree::read(
         if (local_context->getSettingsRef().allow_experimental_analyzer)
         {
             QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
-            rewriteJoinToGlobalJoin(modified_query_tree);
+            rewriteJoinToGlobalJoin(modified_query_tree, local_context);
             modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree);
             header = InterpreterSelectQueryAnalyzer::getSampleBlock(
                 modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
@@ -255,7 +255,9 @@ void StorageMergeTree::read(
     }
     else
     {
-        const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree;
+        const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower()
+            && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree
+            && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower);
 
         if (auto plan = reader.read(
                 column_names,
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 998bad17e53..2024449e009 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5412,7 +5412,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
     if (local_context->getSettingsRef().allow_experimental_analyzer)
     {
         QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
-        rewriteJoinToGlobalJoin(modified_query_tree);
+        rewriteJoinToGlobalJoin(modified_query_tree, local_context);
         modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree);
 
         header = InterpreterSelectQueryAnalyzer::getSampleBlock(
@@ -5451,11 +5451,14 @@ void StorageReplicatedMergeTree::readLocalImpl(
     const size_t max_block_size,
     const size_t num_streams)
 {
+    const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower()
+        && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower);
+
     auto plan = reader.read(
         column_names, storage_snapshot, query_info,
         local_context, max_block_size, num_streams,
         /* max_block_numbers_to_read= */ nullptr,
-        /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower());
+        enable_parallel_reading);
 
     if (plan)
         query_plan = std::move(*plan);
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index 5f7afe99f33..c87a1b216ca 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -386,16 +386,47 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex
     return query_tree_to_modify;
 }
 
-class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>
+class CollectStoragesVisitor : public InDepthQueryTreeVisitor<CollectStoragesVisitor>
 {
 public:
-    using Base = InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>;
+    using Base = InDepthQueryTreeVisitor<CollectStoragesVisitor>;
     using Base::Base;
 
     void visitImpl(QueryTreeNodePtr & node)
+    {
+        if (auto * table_node = node->as<TableNode>())
+            storages.push_back(table_node->getStorage());
+    }
+
+    std::vector<StoragePtr> storages;
+};
+
+class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitorWithContext<RewriteJoinToGlobalJoinVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<RewriteJoinToGlobalJoinVisitor>;
+    using Base::Base;
+
+    static bool allStoragesAreMergeTree(QueryTreeNodePtr & node)
+    {
+        CollectStoragesVisitor collect_storages;
+        collect_storages.visit(node);
+        for (const auto & storage : collect_storages.storages)
+            if (!storage->isMergeTree())
+                return false;
+
+        return true;
+    }
+
+    void enterImpl(QueryTreeNodePtr & node)
     {
         if (auto * join_node = node->as<JoinNode>())
-            join_node->setLocality(JoinLocality::Global);
+        {
+            bool prefer_local_join = getContext()->getSettingsRef().parallel_replicas_prefer_local_join;
+            bool should_use_global_join = !prefer_local_join || !allStoragesAreMergeTree(join_node->getRightTableExpression());
+            if (should_use_global_join)
+                join_node->setLocality(JoinLocality::Global);
+        }
     }
 
     static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
@@ -408,9 +439,9 @@ public:
     }
 };
 
-void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify)
+void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify, ContextPtr context)
 {
-    RewriteJoinToGlobalJoinVisitor visitor;
+    RewriteJoinToGlobalJoinVisitor visitor(context);
     visitor.visit(query_tree_to_modify);
 }
 
diff --git a/src/Storages/buildQueryTreeForShard.h b/src/Storages/buildQueryTreeForShard.h
index 3ffb5707c2b..5b00b89c729 100644
--- a/src/Storages/buildQueryTreeForShard.h
+++ b/src/Storages/buildQueryTreeForShard.h
@@ -13,8 +13,11 @@ using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
 class PlannerContext;
 using PlannerContextPtr = std::shared_ptr<PlannerContext>;
 
+class Context;
+using ContextPtr = std::shared_ptr<const Context>;
+
 QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify);
 
-void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify);
+void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify, ContextPtr context);
 
 }

From ec745713922a8bfdbfa670ef9dd6a8b016cf8f24 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 31 Jan 2024 14:51:17 +0000
Subject: [PATCH 353/884] Fixing fasttest.

---
 .../02731_parallel_replicas_join_subquery.reference            | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
index ec4928bc325..e296e115cbc 100644
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
@@ -58,8 +58,7 @@ U	c	10
 UlI+1		10
 bX?}ix [	Ny]2 G	10
 t<iT	X48q:Z]t0	10
-0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2`
-0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
+0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`key` AS `key`, `__table3`.`value1` AS `value1`, `__table3`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table3` PREWHERE (`__table3`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table3`.`number` > 1610517366120) GROUP BY `__table3`.`key`, `__table3`.`value1`, `__table3`.`value2`) AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
 0	3	SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2`
 0	3	SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2`
 1	1	-- Parallel full query\nSELECT\n    value1,\n    value2,\n    avg(count) AS avg\nFROM\n    (\n        SELECT\n            key,\n            value1,\n            value2,\n            count() AS count\n        FROM join_outer_table\n        INNER JOIN\n        (\n            SELECT\n                key,\n                value1,\n                value2,\n                toUInt64(min(time)) AS start_ts\n            FROM join_inner_table\n            PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n            GROUP BY key, value1, value2\n        ) USING (key)\n        GROUP BY key, value1, value2\n        )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;

From 9c6538b9148e63fafcb7f4645a16e8a7b3ab9de6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 31 Jan 2024 16:00:31 +0000
Subject: [PATCH 354/884] Update test.

---
 ...llel_replicas_joins_and_analyzer.reference | 251 ++++++++++++++++++
 ...rallel_replicas_joins_and_analyzer.sql.j2} |   6 +
 2 files changed, 257 insertions(+)
 rename tests/queries/0_stateless/{02967_parallel_replicas_joins_and_analyzer.sql => 02967_parallel_replicas_joins_and_analyzer.sql.j2} (98%)

diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
index fdb4a761015..6b1fdfd42a2 100644
--- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
+++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference
@@ -1,5 +1,256 @@
 -- { echoOn }
 
+set parallel_replicas_prefer_local_join = 0;
+-- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode.
+select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  ReadFromRemoteParallelReplicas
+--
+-- The same query with cte;
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      ReadFromRemoteParallelReplicas
+--
+-- GROUP BY should work up to WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+54	54	50	50	12	12	0
+64	64	0	0	0	0	1
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select sum(x), sum(y), sum(r.y), sum(z), sum(rr.z), sum(a), key from sub3 ll any left join sub4 rr on ll.z = rr.z group by x % 2 as key)
+select * from sub5 order by key
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      MergingAggregated
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- ORDER BY in sub3 : sub1 is fully pushed, sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y order by l.x),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- ORDER BY in sub1 : sub1 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+1	1	0	0	0	0
+3	3	0	0	0	0
+4	4	0	0	0	0
+5	5	0	0	0	0
+6	6	6	6	0	0
+7	7	0	0	0	0
+8	8	8	8	0	0
+9	9	0	0	0	0
+10	10	10	10	0	0
+11	11	0	0	0	0
+12	12	12	12	12	12
+13	13	0	0	0	0
+14	14	14	14	0	0
+15	15	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2 order by y),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5 order by x
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          Join
+            Expression
+              ReadFromRemoteParallelReplicas
+            Expression
+              ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+--
+-- RIGHT JOIN in sub3: sub3 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+6	6	6	6	0	0
+8	8	8	8	0	0
+10	10	10	10	0	0
+12	12	12	12	12	12
+14	14	14	14	0	0
+4	4	0	0	0	0
+3	3	0	0	0	0
+5	5	0	0	0	0
+1	1	0	0	0	0
+7	7	0	0	0	0
+9	9	0	0	0	0
+15	15	0	0	0	0
+11	11	0	0	0	0
+13	13	0	0	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub2 r any right join sub1 l on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select x, l.y, y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z)
+select * from sub5
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+Expression
+  Join
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          ReadFromRemoteParallelReplicas
+    Expression
+      ReadFromRemoteParallelReplicas
+--
+-- RIGHT JOIN in sub5: sub5 -> WithMergableStage
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+0	0	0	0	0	0
+0	0	1	1	0	0
+0	0	3	3	0	0
+0	0	4	4	0	0
+0	0	5	5	0	0
+0	0	6	6	6	6
+0	0	7	7	0	0
+0	0	8	8	8	8
+0	0	9	9	0	0
+0	0	10	10	10	10
+0	0	11	11	0	0
+12	12	12	12	12	12
+0	0	13	13	0	0
+0	0	14	14	14	14
+0	0	15	15	0	0
+explain description=0
+with sub1 as (select x, y from tab1 where x != 2),
+sub2 as (select y, z from tab2 where y != 4),
+sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y),
+sub4 as (select z, a from tab3 where z != 8),
+sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
+select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;-- { echoOn }
+Expression
+  Sorting
+    Expression
+      Join
+        Expression
+          ReadFromRemoteParallelReplicas
+        Expression
+          Join
+            Expression
+              ReadFromRemoteParallelReplicas
+            Expression
+              ReadFromRemoteParallelReplicas
+set parallel_replicas_prefer_local_join = 1;
 -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode.
 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
 0	0	0	0	0	0
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2
similarity index 98%
rename from tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql
rename to tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2
index a3a2269fabb..7d2766d52f8 100644
--- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql
+++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2
@@ -10,8 +10,12 @@ insert into tab1 select number, number, number from numbers(16);
 insert into tab2 select number * 2, number * 2 from numbers(8);
 insert into tab3 select number * 4, number * 4 from numbers(4);
 
+{% for use_global_in in [0, 1] -%}
+
 -- { echoOn }
 
+set parallel_replicas_prefer_local_join = {{use_global_in}};
+
 -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode.
 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
 explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
@@ -121,3 +125,5 @@ sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y
 sub4 as (select z, a from tab3 where z != 8),
 sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z)
 select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;
+
+{%- endfor %}

From 8a933e969882c3e6aa7e9c9aa387cfe492be7f38 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 2 Feb 2024 17:24:17 +0000
Subject: [PATCH 355/884] Add a test.

---
 ..._replicas_join_algo_and_analyzer.reference | 166 ++++++++++++
 ...arallel_replicas_join_algo_and_analyzer.sh | 252 ++++++++++++++++++
 2 files changed, 418 insertions(+)
 create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
 create mode 100755 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh

diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
new file mode 100644
index 00000000000..1e3ffec2cd9
--- /dev/null
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
@@ -0,0 +1,166 @@
+simple join with analyzer
+4200000	4200000	4200000	-1400000
+4200006	4200006	4200006	-1400002
+4200012	4200012	4200012	-1400004
+4200018	4200018	4200018	-1400006
+4200024	4200024	4200024	-1400008
+4200030	4200030	4200030	-1400010
+4200036	4200036	4200036	-1400012
+4200042	4200042	4200042	-1400014
+4200048	4200048	4200048	-1400016
+4200054	4200054	4200054	-1400018
+simple (global) join with analyzer and parallel replicas
+4200000	4200000	4200000	-1400000
+4200006	4200006	4200006	-1400002
+4200012	4200012	4200012	-1400004
+4200018	4200018	4200018	-1400006
+4200024	4200024	4200024	-1400008
+4200030	4200030	4200030	-1400010
+4200036	4200036	4200036	-1400012
+4200042	4200042	4200042	-1400014
+4200048	4200048	4200048	-1400016
+4200054	4200054	4200054	-1400018
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level = 'trace', max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level = 'trace', max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+simple (local) join with analyzer and parallel replicas
+4200000	4200000	4200000	-1400000
+4200006	4200006	4200006	-1400002
+4200012	4200012	4200012	-1400004
+4200018	4200018	4200018	-1400006
+4200024	4200024	4200024	-1400008
+4200030	4200030	4200030	-1400010
+4200036	4200036	4200036	-1400012
+4200042	4200042	4200042	-1400014
+4200048	4200048	4200048	-1400016
+4200054	4200054	4200054	-1400018
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+simple (local) join with analyzer and parallel replicas and full sorting merge join
+4200000	4200000	4200000	-1400000
+4200006	4200006	4200006	-1400002
+4200012	4200012	4200012	-1400004
+4200018	4200018	4200018	-1400006
+4200024	4200024	4200024	-1400008
+4200030	4200030	4200030	-1400010
+4200036	4200036	4200036	-1400012
+4200042	4200042	4200042	-1400014
+4200048	4200048	4200048	-1400016
+4200054	4200054	4200054	-1400018
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+nested join with analyzer
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+nested join with analyzer and parallel replicas, both local
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+nested join with analyzer and parallel replicas, both global
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+nested join with analyzer and parallel replicas, global + local
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table3`) AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table3`) AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+nested join with analyzer and parallel replicas, both local, both full sorting merge join
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+nested join with analyzer and parallel replicas, both local, both full sorting and hash join
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'hash' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'hash' (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+nested join with analyzer and parallel replicas, both local, both full sorting and hash join
+420000	420000	420000	-140000
+420042	420042	420042	-140014
+420084	420084	420084	-140028
+420126	420126	420126	-140042
+420168	420168	420168	-140056
+420210	420210	420210	-140070
+420252	420252	420252	-140084
+420294	420294	420294	-140098
+420336	420336	420336	-140112
+420378	420378	420378	-140126
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` SETTINGS join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
+<Debug> WithOrderCoordinator: Coordination done
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'hash' (stage: WithMergeableState)
+SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'hash' (stage: WithMergeableState)
+<Debug> DefaultCoordinator: Coordination done
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
new file mode 100755
index 00000000000..aecd01d7547
--- /dev/null
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
@@ -0,0 +1,252 @@
+#!/usr/bin/env bash
+# Tags: no-random-merge-tree-settings
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+$CLICKHOUSE_CLIENT -nm -q "
+drop table if exists num_1;
+drop table if exists num_2;
+
+create table num_1 (key UInt64, value String) engine = MergeTree order by key;
+create table num_2 (key UInt64, value Int64) engine = MergeTree order by key;
+
+insert into num_1 select number * 2, toString(number * 2) from numbers(1e7);
+insert into num_2 select number * 3, -number from numbers(1.5e6);
+"
+
+##############
+echo "simple join with analyzer"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1"
+
+##############
+echo "simple (global) join with analyzer and parallel replicas"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2,
+max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace',
+max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+##############
+echo "simple (local) join with analyzer and parallel replicas"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+
+##############
+echo "simple (local) join with analyzer and parallel replicas and full sorting merge join"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2) r on l.key = r.key
+order by l.key limit 10 offset 700000
+SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+
+##############
+echo "nested join with analyzer"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1"
+
+
+##############
+echo "nested join with analyzer and parallel replicas, both local"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+
+##############
+echo "nested join with analyzer and parallel replicas, both global"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+##############
+echo "nested join with analyzer and parallel replicas, global + local"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+
+##############
+echo "nested join with analyzer and parallel replicas, both local, both full sorting merge join"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+##############
+echo "nested join with analyzer and parallel replicas, both local, both full sorting and hash join"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
+
+##############
+echo "nested join with analyzer and parallel replicas, both local, both full sorting and hash join"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'"
+
+$CLICKHOUSE_CLIENT -q "
+select * from (select key, value from num_1) l
+inner join (select key, value from num_2 inner join
+  (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
+on l.key = r.key order by l.key limit 10 offset 10000
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
+cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 |
+grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
+grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
+sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'

From e5a8e36d5e6feef0c9054054c4412ac75650f6ae Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 2 Feb 2024 18:57:57 +0000
Subject: [PATCH 356/884] Update test

---
 .../02967_parallel_replicas_join_algo_and_analyzer.sh           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
index aecd01d7547..bda93d08bb1 100755
--- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-random-merge-tree-settings
+# Tags: long, no-random-settings, no-random-merge-tree-settings
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 29780b146810fe4f781f824d21a66ccd93260640 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 5 Feb 2024 17:04:14 +0000
Subject: [PATCH 357/884] Fixing tests.

---
 src/Core/SettingsChangesHistory.h                            | 5 ++++-
 .../02784_parallel_replicas_automatic_decision_join.sh       | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index c3b0cee00a4..1e4cb72c350 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -108,9 +108,12 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
+              {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
+              {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
+              {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
+              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
index 1a74c3230c6..ef3e6000903 100755
--- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
+++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
@@ -64,6 +64,7 @@ function run_query_with_pure_parallel_replicas () {
         --query_id "${1}_pure" \
         --max_parallel_replicas 3 \
         --prefer_localhost_replica 1 \
+        --parallel_replicas_prefer_local_join 0 \
         --cluster_for_parallel_replicas "parallel_replicas" \
         --allow_experimental_parallel_reading_from_replicas 1 \
         --parallel_replicas_for_non_replicated_merge_tree 1 \

From 3ddaa1b5f19251974d16431d6c35f0965f639610 Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Mon, 5 Feb 2024 18:09:43 +0100
Subject: [PATCH 358/884] CI: nightly job to update latest docker tag only
 (#59586)

* CI: nightly job to update latest docker tag only

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 .github/workflows/master.yml          | 10 ---
 .github/workflows/nightly.yml         |  2 +-
 .github/workflows/reusable_docker.yml | 16 ++---
 tests/ci/ci.py                        | 99 ++++++++++-----------------
 tests/ci/ci_config.py                 |  9 ++-
 tests/ci/docker_manifests_merge.py    | 14 ++--
 6 files changed, 58 insertions(+), 92 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 5d57e6fc1d8..2471e4f9194 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -55,7 +55,6 @@ jobs:
     uses: ./.github/workflows/reusable_docker.yml
     with:
       data: ${{ needs.RunConfig.outputs.data }}
-      set_latest: true
   StyleCheck:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
@@ -362,14 +361,6 @@ jobs:
       test_name: Stateless tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestReleaseDatabaseOrdinary:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (release, DatabaseOrdinary)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [RunConfig, BuilderDebRelease]
     if: ${{ !failure() && !cancelled() }}
@@ -733,7 +724,6 @@ jobs:
       - MarkReleaseReady
       - FunctionalStatelessTestDebug
       - FunctionalStatelessTestRelease
-      - FunctionalStatelessTestReleaseDatabaseOrdinary
       - FunctionalStatelessTestReleaseDatabaseReplicated
       - FunctionalStatelessTestReleaseAnalyzer
       - FunctionalStatelessTestReleaseS3
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 2774eae24cc..770e1ec3789 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -28,7 +28,7 @@ jobs:
         id: runconfig
         run: |
             echo "::group::configure CI run"
-            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --rebuild-all-docker --outfile ${{ runner.temp }}/ci_run_data.json
+            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --outfile ${{ runner.temp }}/ci_run_data.json
             echo "::endgroup::"
 
             echo "::group::CI run configure results"
diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/reusable_docker.yml
index 08a5740e7e0..1b186350580 100644
--- a/.github/workflows/reusable_docker.yml
+++ b/.github/workflows/reusable_docker.yml
@@ -46,7 +46,7 @@ jobs:
     needs: [DockerBuildAmd64, DockerBuildAarch64]
     runs-on: [self-hosted, style-checker]
     if: |
-      !failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]'
+      !failure() && !cancelled() && (toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]' || inputs.set_latest)
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
@@ -55,14 +55,12 @@ jobs:
       - name: Build images
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
+          FLAG_LATEST=''
           if [ "${{ inputs.set_latest }}" == "true" ]; then
+            FLAG_LATEST='--set-latest'
             echo "latest tag will be set for resulting manifests"
-            python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
-              --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
-              --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
-              --set-latest
-          else
-            python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
-              --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
-              --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}'
           fi
+          python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
+            --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
+            --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
+            "$FLAG_LATEST"
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 12e27b532db..d2d05b1ed2d 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -734,12 +734,6 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         default=False,
         help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
     )
-    parser.add_argument(
-        "--rebuild-all-docker",
-        action="store_true",
-        default=False,
-        help="will create run config for rebuilding all dockers, used in --configure action (for nightly docker job)",
-    )
     # FIXME: remove, not used
     parser.add_argument(
         "--rebuild-all-binaries",
@@ -939,9 +933,7 @@ def _update_config_for_docs_only(jobs_data: dict) -> None:
     }
 
 
-def _configure_docker_jobs(
-    rebuild_all_dockers: bool, docker_digest_or_latest: bool = False
-) -> Dict:
+def _configure_docker_jobs(docker_digest_or_latest: bool) -> Dict:
     print("::group::Docker images check")
     # generate docker jobs data
     docker_digester = DockerDigester()
@@ -950,50 +942,33 @@ def _configure_docker_jobs(
     )  # 'image name - digest' mapping
     images_info = docker_images_helper.get_images_info()
 
-    # a. check missing images
-    if not rebuild_all_dockers:
-        # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
-        #         find if it's possible to use the setting of /etc/docker/daemon.json
-        docker_images_helper.docker_login()
-        missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
-        missing_multi = list(missing_multi_dict)
-        missing_amd64 = []
-        missing_aarch64 = []
-        if not docker_digest_or_latest:
-            # look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict
-            # to avoid extra dockerhub api calls
-            missing_amd64 = list(
-                check_missing_images_on_dockerhub(missing_multi_dict, "amd64")
-            )
-            # FIXME: WA until full arm support: skip not supported arm images
-            missing_aarch64 = list(
-                check_missing_images_on_dockerhub(
-                    {
-                        im: digest
-                        for im, digest in missing_multi_dict.items()
-                        if not images_info[im]["only_amd64"]
-                    },
-                    "aarch64",
-                )
-            )
-        # FIXME: temporary hack, remove after transition to docker digest as tag
-        else:
-            if missing_multi:
-                print(
-                    f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
-                )
-                for image in missing_multi:
-                    imagename_digest_dict[image] = "latest"
-    else:
-        # add all images to missing
-        missing_multi = list(imagename_digest_dict)
-        missing_amd64 = missing_multi
+    # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
+    #   find if it's possible to use the setting of /etc/docker/daemon.json (https://github.com/docker/cli/issues/4484#issuecomment-1688095463)
+    docker_images_helper.docker_login()
+    missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
+    missing_multi = list(missing_multi_dict)
+    missing_amd64 = []
+    missing_aarch64 = []
+    if not docker_digest_or_latest:
+        # look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict
+        # to avoid extra dockerhub api calls
+        missing_amd64 = list(
+            check_missing_images_on_dockerhub(missing_multi_dict, "amd64")
+        )
         # FIXME: WA until full arm support: skip not supported arm images
-        missing_aarch64 = [
-            name
-            for name in imagename_digest_dict
-            if not images_info[name]["only_amd64"]
-        ]
+        missing_aarch64 = list(
+            check_missing_images_on_dockerhub(
+                {
+                    im: digest
+                    for im, digest in missing_multi_dict.items()
+                    if not images_info[im]["only_amd64"]
+                },
+                "aarch64",
+            )
+        )
+    else:
+        if missing_multi:
+            assert False, f"Missing images [{missing_multi}], cannot proceed"
     print("::endgroup::")
 
     return {
@@ -1502,9 +1477,7 @@ def main() -> int:
         print(f"Got CH version for this commit: [{version}]")
 
         docker_data = (
-            _configure_docker_jobs(
-                args.rebuild_all_docker, args.docker_digest_or_latest
-            )
+            _configure_docker_jobs(args.docker_digest_or_latest)
             if not args.skip_docker
             else {}
         )
@@ -1528,17 +1501,16 @@ def main() -> int:
             else {}
         )
 
-        # FIXME: Early style check manipulates with job names might be not robust with await feature
-        if pr_info.number != 0 and not args.docker_digest_or_latest:
-            # FIXME: it runs style check before docker build if possible (style-check images is not changed)
-            #    find a way to do style check always before docker build and others
-            _check_and_update_for_early_style_check(jobs_data, docker_data)
-        if args.skip_jobs and pr_info.has_changes_in_documentation_only():
+        # # FIXME: Early style check manipulates with job names might be not robust with await feature
+        # if pr_info.number != 0:
+        #     # FIXME: it runs style check before docker build if possible (style-check images is not changed)
+        #     #    find a way to do style check always before docker build and others
+        #     _check_and_update_for_early_style_check(jobs_data, docker_data)
+        if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
             _update_config_for_docs_only(jobs_data)
 
         # TODO: await pending jobs
         # wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited
-        ci_cache = CiCache(s3, jobs_data["digests"])
         # awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {}))
         # for job in awaited_jobs:
         #     jobs_to_do = jobs_data["jobs_to_do"]
@@ -1548,7 +1520,8 @@ def main() -> int:
         #         assert False, "BUG"
 
         # set planned jobs as pending in the CI cache if on the master
-        if pr_info.is_master():
+        if pr_info.is_master() and not args.skip_jobs:
+            ci_cache = CiCache(s3, jobs_data["digests"])
             for job in jobs_data["jobs_to_do"]:
                 config = CI_CONFIG.get_job_config(job)
                 if config.run_always or config.run_by_label:
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 23270aae8ec..6036a04080c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -296,6 +296,7 @@ stateless_check_digest = DigestConfig(
     include_paths=[
         "./tests/queries/0_stateless/",
         "./tests/clickhouse-test",
+        "./tests/config",
         "./tests/*.txt",
     ],
     exclude_files=[".md"],
@@ -305,6 +306,7 @@ stateful_check_digest = DigestConfig(
     include_paths=[
         "./tests/queries/1_stateful/",
         "./tests/clickhouse-test",
+        "./tests/config",
         "./tests/*.txt",
     ],
     exclude_files=[".md"],
@@ -316,6 +318,7 @@ stress_check_digest = DigestConfig(
         "./tests/queries/0_stateless/",
         "./tests/queries/1_stateful/",
         "./tests/clickhouse-test",
+        "./tests/config",
         "./tests/*.txt",
     ],
     exclude_files=[".md"],
@@ -962,10 +965,6 @@ CI_CONFIG = CiConfig(
         JobNames.STATELESS_TEST_ANALYZER_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
         ),
-        # delete?
-        # "Stateless tests (release, DatabaseOrdinary)": TestConfig(
-        #     Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
-        # ),
         JobNames.STATELESS_TEST_DB_REPL_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE,
             job_config=JobConfig(num_batches=4, **statless_test_common_params),  # type: ignore
@@ -1112,7 +1111,7 @@ CI_CONFIG = CiConfig(
         JobNames.SQL_LOGIC_TEST: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllogic_test_params)  # type: ignore
         ),
-        JobNames.SQL_LOGIC_TEST: TestConfig(
+        JobNames.SQLTEST: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params)  # type: ignore
         ),
         JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE),
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index f87246be24b..1ae92807c16 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -154,22 +154,28 @@ def main():
     ok_cnt, fail_cnt = 0, 0
     images = get_images_oredered_list()
     for image_obj in images:
-        if image_obj.repo not in missing_images:
-            continue
         tag = image_tags[image_obj.repo]
         if image_obj.only_amd64:
             # FIXME: WA until full arm support
             tags = [f"{tag}-{arch}" for arch in archs if arch != "aarch64"]
         else:
             tags = [f"{tag}-{arch}" for arch in archs]
-        manifest, test_result = create_manifest(image_obj.repo, tag, tags, args.push)
-        test_results.append(TestResult(manifest, test_result))
+
+        # 1. update multiarch latest manifest for every image
         if args.set_latest:
             manifest, test_result = create_manifest(
                 image_obj.repo, "latest", tags, args.push
             )
             test_results.append(TestResult(manifest, test_result))
 
+        # 2. skip manifest create if not missing
+        if image_obj.repo not in missing_images:
+            continue
+
+        # 3. created image:digest multiarch manifest for changed images only
+        manifest, test_result = create_manifest(image_obj.repo, tag, tags, args.push)
+        test_results.append(TestResult(manifest, test_result))
+
         if test_result != "OK":
             status = "failure"
             fail_cnt += 1

From 6563d0b24a778cfa739994723f2857b46c21a255 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 5 Feb 2024 17:10:14 +0000
Subject: [PATCH 359/884] Fixing merge

---
 src/Core/SettingsChangesHistory.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 1e4cb72c350..9686684cc24 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -113,7 +113,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
-              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

From c9954a7eb8e75c130435b84fd14e0be17765adb2 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Mon, 5 Feb 2024 09:23:11 -0800
Subject: [PATCH 360/884] added check for NaN and infinite values

---
 src/Functions/seriesOutliersDetectTukey.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp
index a2fc9cf8eb6..8a2e276c74a 100644
--- a/src/Functions/seriesOutliersDetectTukey.cpp
+++ b/src/Functions/seriesOutliersDetectTukey.cpp
@@ -6,6 +6,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
+#include <Common/NaNUtils.h>
 #include <cmath>
 
 namespace DB
@@ -80,19 +81,19 @@ public:
         if (arguments.size() > 1)
         {
             Float64 p_min = arguments[1].column->getFloat64(0);
-            if (p_min < min_quartile|| p_min > max_quartile)
+            if (isnan(p_min) || !isFinite(p_min) || p_min < min_quartile|| p_min > max_quartile)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
 
             min_percentile = p_min / 100;
 
             Float64 p_max = arguments[2].column->getFloat64(0);
-            if (p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100)
+            if (isnan(p_max) || !isFinite(p_max) || p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
 
             max_percentile = p_max / 100;
 
             auto k_val = arguments[3].column->getFloat64(0);
-            if (k_val < 0.0)
+            if (k_val < 0.0 || isnan(k_val) || !isFinite(k_val))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
 
             k = k_val;

From 6b06fcf7fcbbb1c9aeb85d9d4692974e1aa57b5a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 5 Feb 2024 17:54:15 +0000
Subject: [PATCH 361/884] Fix more tests.

---
 src/Planner/Utils.cpp                         | 26 +++++++++++++++++++
 src/Planner/Utils.h                           |  3 +++
 src/Planner/findParallelReplicasQuery.cpp     | 20 +-------------
 src/Storages/StorageDistributed.cpp           |  2 +-
 src/Storages/StorageMergeTree.cpp             |  2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |  2 +-
 ..._replicas_join_algo_and_analyzer.reference | 11 ++++++++
 ...arallel_replicas_join_algo_and_analyzer.sh | 11 ++++++++
 8 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp
index 63f68ccf838..e127a1a053e 100644
--- a/src/Planner/Utils.cpp
+++ b/src/Planner/Utils.cpp
@@ -37,6 +37,8 @@
 #include <Planner/CollectTableExpressionData.h>
 #include <Planner/CollectSets.h>
 
+#include <stack>
+
 namespace DB
 {
 
@@ -130,6 +132,30 @@ ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node)
     return result_ast;
 }
 
+static void removeCTEs(ASTPtr & ast)
+{
+    std::stack<IAST *> stack;
+    stack.push(ast.get());
+    while (!stack.empty())
+    {
+        auto * node = stack.top();
+        stack.pop();
+
+        if (auto * subquery = typeid_cast<ASTSubquery *>(node))
+            subquery->cte_name = {};
+
+        for (const auto & child : node->children)
+            stack.push(child.get());
+    }
+}
+
+ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node)
+{
+    auto ast = queryNodeToSelectQuery(query_node);
+    removeCTEs(ast);
+    return ast;
+}
+
 /** There are no limits on the maximum size of the result for the subquery.
   * Since the result of the query is not the result of the entire query.
   */
diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h
index 1b8397f47cc..8df26d598b1 100644
--- a/src/Planner/Utils.h
+++ b/src/Planner/Utils.h
@@ -34,6 +34,9 @@ Block buildCommonHeaderForUnion(const Blocks & queries_headers, SelectUnionMode
 /// Convert query node to ASTSelectQuery
 ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node);
 
+/// Convert query node to ASTSelectQuery for distributed processing
+ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node);
+
 /// Build context for subquery execution
 ContextPtr buildSubqueryContext(const ContextPtr & context);
 
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index 014052fecd2..cf14fc08351 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -391,23 +391,6 @@ const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tr
     return findTableForParallelReplicas(query_tree_node.get());
 }
 
-static void removeCTEs(ASTPtr & ast)
-{
-    std::stack<IAST *> stack;
-    stack.push(ast.get());
-    while (!stack.empty())
-    {
-        auto * node = stack.top();
-        stack.pop();
-
-        if (auto * subquery = typeid_cast<ASTSubquery *>(node))
-            subquery->cte_name = {};
-
-        for (const auto & child : node->children)
-            stack.push(child.get());
-    }
-}
-
 JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     const QueryNode & query_node,
     const PlannerContextPtr & planner_context,
@@ -428,8 +411,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     // std::cerr << "buildQueryPlanForParallelReplicas 1 " << modified_query_tree->dumpTree() << std::endl;
     modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
     // std::cerr << "buildQueryPlanForParallelReplicas 2 " << modified_query_tree->dumpTree() << std::endl;
-    modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
-    removeCTEs(modified_query_ast);
+    modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
 
     // std::cerr << "buildQueryPlanForParallelReplicas AST " << queryToString(modified_query_ast) << std::endl;
     // std::cerr << "buildQueryPlanForParallelReplicas AST " << modified_query_ast->dumpTree() << std::endl;
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 70a678b0d86..23e6d1b303e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -830,7 +830,7 @@ void StorageDistributed::read(
           */
         for (auto & column : header)
             column.column = column.column->convertToFullColumnIfConst();
-        query_ast = queryNodeToSelectQuery(query_tree_distributed);
+        query_ast = queryNodeToDistributedSelectQuery(query_tree_distributed);
     }
     else
     {
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 93d21c80698..d0f834a011f 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -228,7 +228,7 @@ void StorageMergeTree::read(
             modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree);
             header = InterpreterSelectQueryAnalyzer::getSampleBlock(
                 modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
-            modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
+            modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
         }
         else
         {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 2024449e009..4a8de500755 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5417,7 +5417,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
 
         header = InterpreterSelectQueryAnalyzer::getSampleBlock(
             modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
-        modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
+        modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
     }
     else
     {
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
index 1e3ffec2cd9..fa343571ba0 100644
--- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference
@@ -1,3 +1,4 @@
+
 simple join with analyzer
 4200000	4200000	4200000	-1400000
 4200006	4200006	4200006	-1400002
@@ -9,6 +10,7 @@ simple join with analyzer
 4200042	4200042	4200042	-1400014
 4200048	4200048	4200048	-1400016
 4200054	4200054	4200054	-1400018
+
 simple (global) join with analyzer and parallel replicas
 4200000	4200000	4200000	-1400000
 4200006	4200006	4200006	-1400002
@@ -26,6 +28,7 @@ SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level = 'trace', max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level = 'trace', max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 <Debug> DefaultCoordinator: Coordination done
+
 simple (local) join with analyzer and parallel replicas
 4200000	4200000	4200000	-1400000
 4200006	4200006	4200006	-1400002
@@ -40,6 +43,7 @@ simple (local) join with analyzer and parallel replicas
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 <Debug> DefaultCoordinator: Coordination done
+
 simple (local) join with analyzer and parallel replicas and full sorting merge join
 4200000	4200000	4200000	-1400000
 4200006	4200006	4200006	-1400002
@@ -54,6 +58,7 @@ simple (local) join with analyzer and parallel replicas and full sorting merge j
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 <Debug> WithOrderCoordinator: Coordination done
+
 nested join with analyzer
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -65,6 +70,7 @@ nested join with analyzer
 420294	420294	420294	-140098
 420336	420336	420336	-140112
 420378	420378	420378	-140126
+
 nested join with analyzer and parallel replicas, both local
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -79,6 +85,7 @@ nested join with analyzer and parallel replicas, both local
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, join_algorithm = 'full_sorting_merge', send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 1 (stage: WithMergeableState)
 <Debug> WithOrderCoordinator: Coordination done
+
 nested join with analyzer and parallel replicas, both global
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -96,6 +103,7 @@ SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 <Debug> DefaultCoordinator: Coordination done
+
 nested join with analyzer and parallel replicas, global + local
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -113,6 +121,7 @@ SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join = 0 (stage: WithMergeableState)
 <Debug> DefaultCoordinator: Coordination done
+
 nested join with analyzer and parallel replicas, both local, both full sorting merge join
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -130,6 +139,7 @@ SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
 <Debug> WithOrderCoordinator: Coordination done
+
 nested join with analyzer and parallel replicas, both local, both full sorting and hash join
 420000	420000	420000	-140000
 420042	420042	420042	-140014
@@ -147,6 +157,7 @@ SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') SETTINGS allow_experimental_analyzer = 1, parallel_replicas_prefer_local_join = 0, send_logs_level = 'trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm = 'full_sorting_merge' (stage: WithMergeableState)
 <Debug> WithOrderCoordinator: Coordination done
+
 nested join with analyzer and parallel replicas, both local, both full sorting and hash join
 420000	420000	420000	-140000
 420042	420042	420042	-140014
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
index bda93d08bb1..2840482da6d 100755
--- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
@@ -18,6 +18,7 @@ insert into num_2 select number * 3, -number from numbers(1.5e6);
 "
 
 ##############
+echo
 echo "simple join with analyzer"
 
 $CLICKHOUSE_CLIENT -q "
@@ -27,6 +28,7 @@ order by l.key limit 10 offset 700000
 SETTINGS allow_experimental_analyzer=1"
 
 ##############
+echo
 echo "simple (global) join with analyzer and parallel replicas"
 
 $CLICKHOUSE_CLIENT -q "
@@ -49,6 +51,7 @@ grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
 sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 ##############
+echo
 echo "simple (local) join with analyzer and parallel replicas"
 
 $CLICKHOUSE_CLIENT -q "
@@ -72,6 +75,7 @@ sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 
 ##############
+echo
 echo "simple (local) join with analyzer and parallel replicas and full sorting merge join"
 
 $CLICKHOUSE_CLIENT -q "
@@ -95,6 +99,7 @@ sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 
 ##############
+echo
 echo "nested join with analyzer"
 
 $CLICKHOUSE_CLIENT -q "
@@ -106,6 +111,7 @@ SETTINGS allow_experimental_analyzer=1"
 
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, both local"
 
 $CLICKHOUSE_CLIENT -q "
@@ -131,6 +137,7 @@ sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, both global"
 
 $CLICKHOUSE_CLIENT -q "
@@ -155,6 +162,7 @@ grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
 sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, global + local"
 
 $CLICKHOUSE_CLIENT -q "
@@ -180,6 +188,7 @@ sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, both local, both full sorting merge join"
 
 $CLICKHOUSE_CLIENT -q "
@@ -204,6 +213,7 @@ grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
 sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, both local, both full sorting and hash join"
 
 $CLICKHOUSE_CLIENT -q "
@@ -228,6 +238,7 @@ grep -o "SELECT.*WithMergeableState)\|<Debug>.*Coordinator: Coordination done" |
 sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g'
 
 ##############
+echo
 echo "nested join with analyzer and parallel replicas, both local, both full sorting and hash join"
 
 $CLICKHOUSE_CLIENT -q "

From d782c9bde622f5fbe10cebf6bd76df363a13a5c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 20:52:14 +0100
Subject: [PATCH 362/884] Fix crash in topK when merging empty states

---
 src/AggregateFunctions/AggregateFunctionTopK.cpp           | 3 +++
 src/Common/SpaceSaving.h                                   | 3 +++
 tests/queries/0_stateless/02984_topk_empty_merge.reference | 1 +
 tests/queries/0_stateless/02984_topk_empty_merge.sql       | 2 ++
 4 files changed, 9 insertions(+)
 create mode 100644 tests/queries/0_stateless/02984_topk_empty_merge.reference
 create mode 100644 tests/queries/0_stateless/02984_topk_empty_merge.sql

diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp
index 660f136e4dc..dcbd5586406 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@@ -234,6 +234,9 @@ public:
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
+        if (!this->data(rhs).value.size())
+            return;
+
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
             set.resize(reserved);
diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h
index 8dc0ee074c0..7a740ae6c9b 100644
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@@ -206,6 +206,9 @@ public:
      */
     void merge(const Self & rhs)
     {
+        if (!rhs.size())
+            return;
+
         UInt64 m1 = 0;
         UInt64 m2 = 0;
 
diff --git a/tests/queries/0_stateless/02984_topk_empty_merge.reference b/tests/queries/0_stateless/02984_topk_empty_merge.reference
new file mode 100644
index 00000000000..fe51488c706
--- /dev/null
+++ b/tests/queries/0_stateless/02984_topk_empty_merge.reference
@@ -0,0 +1 @@
+[]
diff --git a/tests/queries/0_stateless/02984_topk_empty_merge.sql b/tests/queries/0_stateless/02984_topk_empty_merge.sql
new file mode 100644
index 00000000000..754b0cb26a2
--- /dev/null
+++ b/tests/queries/0_stateless/02984_topk_empty_merge.sql
@@ -0,0 +1,2 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/59107
+SELECT topK('102.4') FROM remote('127.0.0.{1,2}', view(SELECT NULL FROM system.one WHERE dummy = 1));

From dab078f7d24bca61a1cf05dc0294fdee48103202 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 5 Feb 2024 20:21:48 +0000
Subject: [PATCH 363/884] Profile events

---
 src/Common/ProfileEvents.cpp                              | 2 ++
 src/QueryPipeline/RemoteQueryExecutor.cpp                 | 6 ++++++
 .../MergeTree/ParallelReplicasReadingCoordinator.cpp      | 8 ++++----
 .../02769_parallel_replicas_unavailable_shards.sql        | 4 +---
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 8782f895f3f..0218545c3a4 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -616,6 +616,8 @@ The server successfully detected this situation and will download merged part fr
     M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \
     \
     M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \
+    M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \
+    M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but unavailable, to execute a query with task-based parallel replicas") \
 
 #ifdef APPLY_FOR_EXTERNAL_EVENTS
     #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 1a68c9d4471..e44749dfb97 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -30,6 +30,7 @@ namespace ProfileEvents
     extern const Event SuspendSendingQueryToShard;
     extern const Event ReadTaskRequestsReceived;
     extern const Event MergeTreeReadTaskRequestsReceived;
+    extern const Event ParallelReplicasAvailableCount;
 }
 
 namespace DB
@@ -97,7 +98,12 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 
         std::vector<IConnectionPool::Entry> connection_entries;
         if (!result.entry.isNull() && result.is_usable)
+        {
+            if (extension_ && extension_->parallel_reading_coordinator)
+                ProfileEvents::increment(ProfileEvents::ParallelReplicasAvailableCount);
+
             connection_entries.emplace_back(std::move(result.entry));
+        }
 
         auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), context->getSettingsRef(), throttler);
         if (extension_ && extension_->replica_info)
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index abc51bde3fb..2fe237efdc7 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -97,11 +97,9 @@ extern const Event ParallelReplicasCollectingOwnedSegmentsMicroseconds;
 extern const Event ParallelReplicasReadAssignedMarks;
 extern const Event ParallelReplicasReadUnassignedMarks;
 extern const Event ParallelReplicasReadAssignedForStealingMarks;
-}
 
-namespace ProfileEvents
-{
-    extern const Event ParallelReplicasUsedCount;
+extern const Event ParallelReplicasUsedCount;
+extern const Event ParallelReplicasUnavailableCount;
 }
 
 namespace DB
@@ -1025,6 +1023,8 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR
 
 void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica_number)
 {
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasUnavailableCount);
+
     std::lock_guard lock(mutex);
 
     if (!pimpl)
diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
index be200353f06..1a75e000349 100644
--- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
+++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
@@ -2,8 +2,6 @@ DROP TABLE IF EXISTS test_parallel_replicas_unavailable_shards;
 CREATE TABLE test_parallel_replicas_unavailable_shards (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
 INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10);
 
-SYSTEM FLUSH LOGS;
-
 SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1;
 SET send_logs_level='error';
 SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79';
@@ -11,6 +9,6 @@ SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*
 SYSTEM FLUSH LOGS;
 
 SET allow_experimental_parallel_reading_from_replicas=0;
-SELECT count() FROM system.text_log WHERE yesterday() <= event_date AND query_id in (select query_id from system.query_log where log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79' and current_database = currentDatabase()) and message LIKE '%Replica number % is unavailable%';
+SELECT ProfileEvents['ParallelReplicasUnavailableCount'] FROM system.query_log WHERE yesterday() <= event_date AND query_id in (select query_id from system.query_log where log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79' and current_database = currentDatabase()) and type = 'QueryFinish' and query_id == initial_query_id;
 
 DROP TABLE test_parallel_replicas_unavailable_shards;

From 40885d7f16d2a5e72d500a799c8b718c36626e63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 21:51:26 +0100
Subject: [PATCH 364/884] Add extra sanity check

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index cb1e94305fb..de7788997be 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5570,6 +5570,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                 column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type);
             }
 
+            if (column->getDataType() != result_type->getColumnType())
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Unexpected return type from {}. Expected {}. Got {}",
+                    function->getName(),
+                    result_type->getColumnType(),
+                    column->getDataType());
+
             /** Do not perform constant folding if there are aggregate or arrayJoin functions inside function.
               * Example: SELECT toTypeName(sum(number)) FROM numbers(10);
               */

From 86ac7b14d0bc1ea1796285755a87c68ba59e46b2 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 5 Feb 2024 21:55:32 +0100
Subject: [PATCH 365/884] Fix Distributed table engine with a constant sharding
 key.

---
 src/Storages/StorageDistributed.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index f711ed5ac92..8b0c1b63306 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1533,10 +1533,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
 IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
 {
     const auto & slot_to_shard = cluster->getSlotToShard();
-
     const IColumn * column = result.column.get();
-    if (const auto * col_const = typeid_cast<const ColumnConst *>(column))
-        column = &col_const->getDataColumn();
 
 // If result.type is DataTypeLowCardinality, do shard according to its dictionaryType
 #define CREATE_FOR_TYPE(TYPE)                                                                                       \

From c9b8559b29d8521ea0242ca4201717d920f63ee4 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Mon, 5 Feb 2024 13:13:33 -0800
Subject: [PATCH 366/884] [Docs] Add Cloud details for quorum insert settings

---
 docs/en/operations/settings/settings.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 3a826b095d2..59d8bf2d858 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1769,6 +1769,10 @@ Default value: 0 (no restriction).
 
 ## insert_quorum {#insert_quorum}
 
+:::note
+`insert_quorum` does not apply to ClickHouse Cloud as all inserts are quorum inserted when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree).
+:::
+
 Enables the quorum writes.
 
 - If `insert_quorum < 2`, the quorum writes are disabled.
@@ -1808,6 +1812,10 @@ See also:
 
 ## insert_quorum_parallel {#insert_quorum_parallel}
 
+:::note
+`insert_quorum_parallel` does not apply to ClickHouse Cloud as all inserts are quorum inserted when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree).
+:::
+
 Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
 
 Possible values:

From b9bb04ebf03f6677d6f81ac628ea94ca3b9b43bd Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Mon, 5 Feb 2024 13:23:54 -0800
Subject: [PATCH 367/884] [Docs] Specify Memory table engine usage on cloud

---
 docs/en/engines/table-engines/special/memory.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md
index 5cd766a318a..a30f620f5c5 100644
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@@ -6,6 +6,10 @@ sidebar_label:  Memory
 
 # Memory Table Engine
 
+:::note
+When using the Memory table engine on ClickHouse Cloud, you must use a client that uses TCP (such as [clickhouse-client](/en/interfaces/cli)) or a native interface, and not one that uses HTTP (such as [clickhouse-connect](/en/integrations/python)). If you use HTTP, all queries must be submitted and executed at once using a multi-statement query.
+:::
+
 The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.
 Concurrent data access is synchronized. Locks are short: read and write operations do not block each other.
 Indexes are not supported. Reading is parallelized.

From 15b6dd143175e1540139cf4a9076a3a170d463b5 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Mon, 5 Feb 2024 22:27:13 +0100
Subject: [PATCH 368/884] Update README.md

Fixing dated Recordings link
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d356e429892..9ada350d173 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Keep an eye out for upcoming meetups around the world. Somewhere else you want u
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
+* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now!
 * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
 
   
From 5de010ddda8311ff178519c0161b6d8314e2641e Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Mon, 5 Feb 2024 13:29:02 -0800
Subject: [PATCH 369/884] [Docs] Add detail on using temporary tables in Cloud

---
 docs/en/sql-reference/statements/create/table.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 7322bc17b76..067761e760b 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -514,6 +514,10 @@ ENGINE = MergeTree ORDER BY x;
 
 ## Temporary Tables
 
+:::note
+Please note that on ClickHouse Cloud, temporary tables are not replicated. As a result, there is no guarantee that data inserted into a temporary table will be available in other replicas. The primary use case where temporary tables can be useful in ClickHouse Cloud is for querying or joining small external datasets during a single session.
+:::
+
 ClickHouse supports temporary tables which have the following characteristics:
 
 - Temporary tables disappear when the session ends, including if the connection is lost.

From d8555af44836d6ecf4ce2d9bae04b1e7d7ae8f6b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 5 Feb 2024 22:41:28 +0100
Subject: [PATCH 370/884] Change the default value for
 `async_insert_max_data_size` to 10 MiB (#59536)

---
 src/Core/Settings.h               | 2 +-
 src/Core/SettingsChangesHistory.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 2b36b83edd2..dc863576a85 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -748,7 +748,7 @@ class IColumn;
     M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
     M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
     M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
-    M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
+    M(UInt64, async_insert_max_data_size, 10485760, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
     M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \
     M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \
     M(Bool, async_insert_use_adaptive_busy_timeout, true, "If it is set to true, use adaptive busy timeout for asynchronous inserts", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index c3b0cee00a4..db3a76e29cd 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -84,7 +84,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"24.2", {{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
+    {"24.2", {{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
+              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
               {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
               {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
               {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},

From a26e8b3a9e4594d648c7a7693b27114038aee89f Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Sat, 3 Feb 2024 18:03:38 +0100
Subject: [PATCH 371/884] Fix for filters w/o path column

---
 .../System/StorageSystemZooKeeper.cpp         | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index cacd7cc2133..6d2166ff5f9 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -220,6 +220,7 @@ private:
     const UInt64 max_block_size;
     Paths paths;
     ContextPtr context;
+    bool started = false;
 };
 
 
@@ -441,7 +442,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont
     for (const auto * node : filter_nodes)
         extractPathImpl(*node, res, context, allow_unrestricted);
 
-    if (filter_nodes.empty() && allow_unrestricted)
+    if (res.empty() && allow_unrestricted)
         res.emplace_back("/", ZkPathType::Recurse);
 
     return res;
@@ -457,7 +458,18 @@ void ReadFromSystemZooKeeper::applyFilters()
 Chunk SystemZooKeeperSource::generate()
 {
     if (paths.empty())
+    {
+        if (!started)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "SELECT from system.zookeeper table must contain condition like path = 'path' "
+                        "or path IN ('path1','path2'...) or path IN (subquery) "
+                        "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
+
+        /// No more work
         return {};
+    }
+
+    started = true;
 
     MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
     size_t row_count = 0;
@@ -487,12 +499,6 @@ Chunk SystemZooKeeperSource::generate()
         return zookeeper;
     };
 
-    if (paths.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "SELECT from system.zookeeper table must contain condition like path = 'path' "
-                        "or path IN ('path1','path2'...) or path IN (subquery) "
-                        "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
-
     const Int64 max_inflight_requests = std::max<Int64>(1, context->getSettingsRef().max_download_threads.value);
 
     struct ListTask
@@ -591,8 +597,8 @@ Chunk SystemZooKeeperSource::generate()
 
             auto & get_task = get_tasks[i];
             auto & list_task = list_tasks[get_task.list_task_idx];
-            if (auto elem = context->getProcessListElement())
-                elem->checkTimeLimit();
+            if (query_status)
+                query_status->checkTimeLimit();
 
             // Deduplication
             String key = list_task.path_part + '/' + get_task.node;

From e673d8bd0cc08d3a53587425b5ddd4f48c5bf943 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 6 Feb 2024 01:50:59 +0300
Subject: [PATCH 372/884] Update 85_bug-report.md

---
 .github/ISSUE_TEMPLATE/85_bug-report.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md
index 93b2342af70..6bf265260ac 100644
--- a/.github/ISSUE_TEMPLATE/85_bug-report.md
+++ b/.github/ISSUE_TEMPLATE/85_bug-report.md
@@ -17,7 +17,7 @@ assignees: ''
 
 > A link to reproducer in [https://fiddle.clickhouse.com/](https://fiddle.clickhouse.com/).
 
-**Does it reproduce on recent release?**
+**Does it reproduce on the most recent release?**
 
 [The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv)
 
@@ -34,11 +34,11 @@ assignees: ''
 **How to reproduce**
 
 * Which ClickHouse server version to use
-* Which interface to use, if matters
+* Which interface to use, if it matters
 * Non-default settings, if any
 * `CREATE TABLE` statements for all tables involved
 * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
-* Queries to run that lead to unexpected result
+* Queries to run that lead to an unexpected result
 
 **Expected behavior**
 

From 1c0fa345ac341030d76a687b0900f8e66739d384 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Mon, 5 Feb 2024 15:51:28 -0800
Subject: [PATCH 373/884] Fix broken link for Variant data type (#59539)

---
 docs/en/sql-reference/data-types/variant.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md
index 17d51878420..f027e3fe343 100644
--- a/docs/en/sql-reference/data-types/variant.md
+++ b/docs/en/sql-reference/data-types/variant.md
@@ -1,5 +1,5 @@
 ---
-slug: /en/sql-reference/data-types/json
+slug: /en/sql-reference/data-types/variant
 sidebar_position: 55
 sidebar_label: Variant
 ---

From 2c882f012ef23efa95d577cb00d08dbeb34e2783 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 6 Feb 2024 11:35:14 +0800
Subject: [PATCH 374/884] opt isNotnull

---
 src/Functions/isNotNull.cpp | 93 ++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 17 deletions(-)

diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp
index 360c2fc7f9f..a15a0e82e3c 100644
--- a/src/Functions/isNotNull.cpp
+++ b/src/Functions/isNotNull.cpp
@@ -1,19 +1,25 @@
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionFactory.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Core/ColumnNumbers.h>
-#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnVariant.h>
+#include <Core/ColumnNumbers.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/PerformanceAdaptors.h>
 #include <Common/assert_cast.h>
 
-
 namespace DB
 {
 namespace
 {
 
+
+#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \
+DECLARE_DEFAULT_CODE      (__VA_ARGS__) \
+DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__)
+
+DECLARE_SEVERAL_IMPLEMENTATIONS(
 /// Implements the function isNotNull which returns true if a value
 /// is not null, false otherwise.
 class FunctionIsNotNull : public IFunction
@@ -21,11 +27,6 @@ class FunctionIsNotNull : public IFunction
 public:
     static constexpr auto name = "isNotNull";
 
-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionIsNotNull>();
-    }
-
     std::string getName() const override
     {
         return name;
@@ -52,9 +53,9 @@ public:
             const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
             auto res = DataTypeUInt8().createColumn();
             auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
-            data.reserve(discriminators.size());
-            for (auto discr : discriminators)
-                data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR);
+            data.resize(discriminators.size());
+            for (size_t i = 0; i < discriminators.size(); ++i)
+                data[i] = discriminators[i] != ColumnVariant::NULL_DISCRIMINATOR;
             return res;
         }
 
@@ -64,9 +65,9 @@ public:
             const size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex();
             auto res = DataTypeUInt8().createColumn();
             auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
-            data.reserve(low_cardinality_column->size());
+            data.resize(low_cardinality_column->size());
             for (size_t i = 0; i != low_cardinality_column->size(); ++i)
-                data.push_back(low_cardinality_column->getIndexAt(i) != null_index);
+                data[i] = (low_cardinality_column->getIndexAt(i) != null_index);
             return res;
         }
 
@@ -88,6 +89,64 @@ public:
             return DataTypeUInt8().createColumnConst(elem.column->size(), 1u);
         }
     }
+
+private:
+    MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res) /// NOLINT
+    {
+        size_t size = null_map.size();
+        for (size_t i = 0; i < size; ++i)
+            res[i] = !null_map[i];
+    }))
+
+    static void NO_INLINE vector(const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res)
+    {
+#if USE_MULTITARGET_CODE
+        if (isArchSupported(TargetArch::AVX2))
+        {
+            vectorImplAVX2(null_map, res);
+            return;
+        }
+
+        if (isArchSupported(TargetArch::SSE42))
+        {
+            vectorImplSSE42(null_map, res);
+            return;
+        }
+#endif
+        vectorImpl(null_map, res);
+    }
+};
+
+) // DECLARE_SEVERAL_IMPLEMENTATIONS
+#undef DECLARE_SEVERAL_IMPLEMENTATIONS
+
+class FunctionIsNotNull : public TargetSpecific::Default::FunctionIsNotNull
+{
+public:
+    explicit FunctionIsNotNull(ContextPtr context) : selector(context)
+    {
+        selector.registerImplementation<TargetArch::Default,
+            TargetSpecific::Default::FunctionIsNotNull>();
+
+    #if USE_MULTITARGET_CODE
+        selector.registerImplementation<TargetArch::AVX2,
+            TargetSpecific::AVX2::FunctionIsNotNull>();
+    #endif
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        return selector.selectAndExecute(arguments, result_type, input_rows_count);
+    }
+
+    static FunctionPtr create(ContextPtr context)
+    {
+        return std::make_shared<FunctionIsNotNull>(context);
+    }
+
+private:
+    ImplementationSelector<IFunction> selector;
 };
 
 }

From bc9c1bf34583ebb7e5974a96725bbcb68da54008 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 6 Feb 2024 11:48:13 +0800
Subject: [PATCH 375/884] improve ifnotnull

---
 src/Functions/isNotNull.cpp | 45 +++----------------------------------
 1 file changed, 3 insertions(+), 42 deletions(-)

diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp
index a15a0e82e3c..dd5182aeade 100644
--- a/src/Functions/isNotNull.cpp
+++ b/src/Functions/isNotNull.cpp
@@ -14,12 +14,6 @@ namespace DB
 namespace
 {
 
-
-#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \
-DECLARE_DEFAULT_CODE      (__VA_ARGS__) \
-DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__)
-
-DECLARE_SEVERAL_IMPLEMENTATIONS(
 /// Implements the function isNotNull which returns true if a value
 /// is not null, false otherwise.
 class FunctionIsNotNull : public IFunction
@@ -27,6 +21,8 @@ class FunctionIsNotNull : public IFunction
 public:
     static constexpr auto name = "isNotNull";
 
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIsNotNull>(); }
+
     std::string getName() const override
     {
         return name;
@@ -77,10 +73,7 @@ public:
             auto res_column = ColumnUInt8::create(input_rows_count);
             const auto & src_data = nullable->getNullMapData();
             auto & res_data = assert_cast<ColumnUInt8 &>(*res_column).getData();
-
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = !src_data[i];
-
+            vector(src_data, res_data);
             return res_column;
         }
         else
@@ -117,38 +110,6 @@ private:
         vectorImpl(null_map, res);
     }
 };
-
-) // DECLARE_SEVERAL_IMPLEMENTATIONS
-#undef DECLARE_SEVERAL_IMPLEMENTATIONS
-
-class FunctionIsNotNull : public TargetSpecific::Default::FunctionIsNotNull
-{
-public:
-    explicit FunctionIsNotNull(ContextPtr context) : selector(context)
-    {
-        selector.registerImplementation<TargetArch::Default,
-            TargetSpecific::Default::FunctionIsNotNull>();
-
-    #if USE_MULTITARGET_CODE
-        selector.registerImplementation<TargetArch::AVX2,
-            TargetSpecific::AVX2::FunctionIsNotNull>();
-    #endif
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        return selector.selectAndExecute(arguments, result_type, input_rows_count);
-    }
-
-    static FunctionPtr create(ContextPtr context)
-    {
-        return std::make_shared<FunctionIsNotNull>(context);
-    }
-
-private:
-    ImplementationSelector<IFunction> selector;
-};
-
 }
 
 REGISTER_FUNCTION(IsNotNull)

From 754b73a38bd8743a8f4ec87c543f2e56a508e2d8 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 6 Feb 2024 09:35:48 +0100
Subject: [PATCH 376/884] remove unused

---
 src/Coordination/SnapshotableHashTable.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index 5533913b3b8..0c6af29d24a 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -7,11 +7,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 template<typename V>
 struct ListNode
 {

From 41202cd7b2f146e3b13f5d0279cb6439bc250f10 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 6 Feb 2024 09:39:22 +0100
Subject: [PATCH 377/884] fix upgrade

---
 docker/test/upgrade/run.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index af535325119..1aecc7331cd 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -115,6 +115,12 @@ remove_keeper_config "async_replication" "1"
 # create_if_not_exists feature flag doesn't exist on some older versions
 remove_keeper_config "create_if_not_exists" "[01]"
 
+# latest_logs_cache_size_threshold setting doesn't exist on some older versions
+remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
+
+# commit_logs_cache_size_threshold setting doesn't exist on some older versions
+remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
+
 # But we still need default disk because some tables loaded only into it
 sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
   | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \

From 5962ed0bd3ee89ba283c541982f60d13b1f010fa Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 5 Feb 2024 21:54:27 +0100
Subject: [PATCH 378/884] Add test.

---
 .../02983_const_sharding_key.reference        |  7 +++++
 .../0_stateless/02983_const_sharding_key.sql  | 26 +++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 tests/queries/0_stateless/02983_const_sharding_key.reference
 create mode 100644 tests/queries/0_stateless/02983_const_sharding_key.sql

diff --git a/tests/queries/0_stateless/02983_const_sharding_key.reference b/tests/queries/0_stateless/02983_const_sharding_key.reference
new file mode 100644
index 00000000000..06e567b11df
--- /dev/null
+++ b/tests/queries/0_stateless/02983_const_sharding_key.reference
@@ -0,0 +1,7 @@
+1
+2
+3
+4
+5
+6
+7
diff --git a/tests/queries/0_stateless/02983_const_sharding_key.sql b/tests/queries/0_stateless/02983_const_sharding_key.sql
new file mode 100644
index 00000000000..339293b8b81
--- /dev/null
+++ b/tests/queries/0_stateless/02983_const_sharding_key.sql
@@ -0,0 +1,26 @@
+-- Tags: distributed, no-parallel
+
+DROP DATABASE IF EXISTS shard_0;
+DROP DATABASE IF EXISTS shard_1;
+DROP TABLE IF EXISTS t_distr;
+
+CREATE DATABASE IF NOT EXISTS shard_0;
+CREATE DATABASE IF NOT EXISTS shard_1;
+
+CREATE TABLE shard_0.t_local (a Int) ENGINE = Memory;
+CREATE TABLE shard_1.t_local (a Int) ENGINE = Memory;
+CREATE TABLE t_distr (a Int) ENGINE = Distributed(test_cluster_two_shards_different_databases, '', 't_local', 1000);
+
+SET distributed_foreground_insert=0;
+INSERT INTO t_distr VALUES (1), (2), (3);
+
+SET distributed_foreground_insert=1;
+INSERT INTO t_distr VALUES (4), (5), (6), (7);
+
+SYSTEM FLUSH DISTRIBUTED t_distr;
+
+SELECT * FROM t_distr ORDER BY a;
+
+DROP TABLE t_distr;
+DROP DATABASE shard_0;
+DROP DATABASE shard_1;

From 127996486602e9ecf46ff8e3ca8cd59cfefcbce6 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 6 Feb 2024 10:18:02 +0100
Subject: [PATCH 379/884] fix build

---
 src/Coordination/KeeperSnapshotManager.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index fe6c7e3a389..c176536dfbf 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -454,7 +454,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
 #else
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}"
                                 " is different from actual children size {} for node {}",
-                                itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key);
+                                itr.value.numChildren(), itr.value.getChildren().size(), itr.key);
 #endif
             }
         }

From cbd81b506fb9acddd59087eddc2d431db91c26af Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Tue, 6 Feb 2024 09:56:15 +0000
Subject: [PATCH 380/884] CI: fix ast fuzzer job report (slack bot issue)

 #job_ast_fuzzer_debug #job_style_check
---
 tests/ci/ast_fuzzer_check.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 26ce7f5140b..5a0eca51570 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -13,7 +13,7 @@ from clickhouse_helper import (
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import PRInfo
-from report import JobReport
+from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -144,12 +144,16 @@ def main():
         with open(workspace_path / "description.txt", "r", encoding="utf-8") as desc_f:
             description = desc_f.readline().rstrip("\n")
     except:
-        status = "failure"
+        status = FAILURE
         description = "Task failed: $?=" + str(retcode)
 
+    test_result = TestResult(description, OK)
+    if "fail" in status:
+        test_result.status = FAIL
+
     JobReport(
         description=description,
-        test_results=[],
+        test_results=[test_result],
         status=status,
         start_time=stopwatch.start_time_str,
         duration=stopwatch.duration_seconds,
@@ -158,7 +162,7 @@ def main():
     ).dump()
 
     logging.info("Result: '%s', '%s'", status, description)
-    if status == "failure":
+    if status != SUCCESS:
         sys.exit(1)
 
 
From 306e739daadaf0ae52f9cb33e8fd13c852e9b4a1 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 6 Feb 2024 12:33:11 +0100
Subject: [PATCH 381/884] Updated a list of trusted contributors (#59616)

* Updated a list of trusted contributors

* Automatic style fix

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 tests/ci/lambda_shared_package/lambda_shared/pr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py
index 4872ecb4d59..ce38475b3ee 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/pr.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py
@@ -43,6 +43,7 @@ TRUSTED_CONTRIBUTORS = {
         "tsolodov",  # ClickHouse, Inc
         "kitaisreal",
         "k-morozov",  # Konstantin Morozov, Yandex Cloud
+        "justindeguzman",  # ClickHouse, Inc
     ]
 }
 

From df93062472c32402ca96c9ff7d92fbc481df9008 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 2 Feb 2024 15:33:08 +0100
Subject: [PATCH 382/884] Apply and fix isort in scripts

---
 tests/ci/clickbench.py             | 14 +++++---------
 tests/ci/commit_status_helper.py   | 17 ++++++++++-------
 tests/ci/docker_images_check.py    | 11 +++++++----
 tests/ci/docker_manifests_merge.py | 11 +++++------
 tests/ci/functional_test_check.py  |  9 ++++++---
 tests/ci/report.py                 | 15 +++++++--------
 tests/ci/sqllogic_test.py          | 11 +++++------
 7 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 72827929ff9..f81a946e994 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -10,18 +10,14 @@ from pathlib import Path
 from typing import List, Tuple
 
 from build_download_helper import download_all_deb_packages
-from clickhouse_helper import (
-    CiLogsCredentials,
-)
-from commit_status_helper import (
-    override_status,
-)
-from docker_images_helper import get_docker_image, pull_image, DockerImage
-from env_helper import TEMP_PATH, REPORT_PATH
+from clickhouse_helper import CiLogsCredentials
+from commit_status_helper import override_status
+from docker_images_helper import DockerImage, get_docker_image, pull_image
+from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import FORCE_TESTS_LABEL, PRInfo
+from report import JobReport, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
-from report import JobReport, TestResults
 
 
 def get_image_name() -> str:
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 851a4cc5298..c8dc6f713b3 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -1,14 +1,15 @@
 #!/usr/bin/env python3
 
-from collections import defaultdict
-import json
-from pathlib import Path
-from typing import Dict, List, Optional, Union
 import csv
+import json
 import logging
 import time
+from collections import defaultdict
 from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Union
 
+# isort: off
 from github import Github
 from github.Commit import Commit
 from github.CommitStatus import CommitStatus
@@ -17,15 +18,17 @@ from github.GithubObject import NotSet
 from github.IssueComment import IssueComment
 from github.Repository import Repository
 
-from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
+# isort: on
+
+from ci_config import CHECK_DESCRIPTIONS, CI_CONFIG, REQUIRED_CHECKS, CheckDescription
 from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH
-from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
+from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo
 from report import (
     ERROR,
     FAILURE,
     PENDING,
-    StatusType,
     SUCCESS,
+    StatusType,
     TestResult,
     TestResults,
     get_worst_status,
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index a25669d85d0..e88deae9a38 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -3,24 +3,27 @@ import argparse
 import json
 import logging
 import os
-import time
 import sys
+import time
 from pathlib import Path
 from typing import List, Optional, Tuple
 
+# isort: off
 from github import Github
 
+# isort: on
+
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
 from commit_status_helper import format_description, get_commit, post_commit_status
-from env_helper import RUNNER_TEMP, GITHUB_RUN_URL
+from docker_images_helper import DockerImageData, docker_login, get_images_oredered_list
+from env_helper import GITHUB_RUN_URL, RUNNER_TEMP
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from report import TestResults, TestResult
+from report import TestResult, TestResults
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from upload_result_helper import upload_results
-from docker_images_helper import DockerImageData, docker_login, get_images_oredered_list
 
 NAME = "Push to Dockerhub"
 TEMP_PATH = Path(RUNNER_TEMP) / "docker_images_check"
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index 1ae92807c16..f99bfc247cc 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -5,24 +5,23 @@ import json
 import logging
 import os
 import subprocess
-
 import sys
 from typing import List, Tuple
 
+# isort: off
 from github import Github
 
-from clickhouse_helper import (
-    ClickHouseHelper,
-    prepare_tests_results_for_clickhouse,
-)
+# isort: on
+
+from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
 from commit_status_helper import format_description, get_commit, post_commit_status
+from docker_images_helper import docker_login, get_images_oredered_list
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
-from docker_images_helper import docker_login, get_images_oredered_list
 
 NAME = "Push multi-arch images to Dockerhub"
 
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index b7e6c656b1f..f5dabe0c26d 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -1,18 +1,21 @@
 #!/usr/bin/env python3
 
 import argparse
+import atexit
 import csv
 import logging
 import os
 import re
 import subprocess
 import sys
-import atexit
 from pathlib import Path
 from typing import List, Tuple
 
+# isort: off
 from github import Github
 
+# isort: on
+
 from build_download_helper import download_all_deb_packages
 from clickhouse_helper import (
     CiLogsCredentials,
@@ -26,9 +29,9 @@ from commit_status_helper import (
     post_commit_status_to_file,
     update_mergeable_check,
 )
-from docker_images_helper import DockerImage, pull_image, get_docker_image
+from docker_images_helper import DockerImage, get_docker_image, pull_image
 from download_release_packages import download_last_release
-from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
+from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import TestResults, read_test_results
diff --git a/tests/ci/report.py b/tests/ci/report.py
index b478f737963..a1213300151 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -1,6 +1,12 @@
 # -*- coding: utf-8 -*-
+import csv
+import datetime
+import json
+import logging
+import os
 from ast import literal_eval
 from dataclasses import asdict, dataclass
+from html import escape
 from pathlib import Path
 from typing import (
     Dict,
@@ -13,18 +19,11 @@ from typing import (
     Tuple,
     Union,
 )
-from html import escape
-import csv
-import datetime
-import json
-import logging
-import os
 
 from build_download_helper import get_gh_api
-from ci_config import BuildConfig, CI_CONFIG
+from ci_config import CI_CONFIG, BuildConfig
 from env_helper import REPORT_PATH, TEMP_PATH
 
-
 logger = logging.getLogger(__name__)
 
 ERROR: Final = "error"
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index a7b3e3cf69e..14018eaa7a7 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -10,22 +10,21 @@ from typing import Tuple
 
 from build_download_helper import download_all_deb_packages
 from commit_status_helper import override_status
-from docker_images_helper import DockerImage, pull_image, get_docker_image
-from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
+from docker_images_helper import DockerImage, get_docker_image, pull_image
+from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from report import (
-    OK,
-    FAIL,
     ERROR,
+    FAIL,
+    OK,
     SUCCESS,
     JobReport,
-    TestResults,
     TestResult,
+    TestResults,
     read_test_results,
 )
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
-
 NO_CHANGES_MSG = "Nothing to run"
 IMAGE_NAME = "clickhouse/sqllogic-test"
 

From 5b9bf80f0960f79c59dd0f0ec07a1999090d9db3 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 2 Feb 2024 15:35:13 +0100
Subject: [PATCH 383/884] Fix usage of status types to prevent API exceptions

---
 tests/ci/clickbench.py             |  6 +++---
 tests/ci/commit_status_helper.py   |  8 +++++---
 tests/ci/docker_images_check.py    |  6 +++---
 tests/ci/docker_manifests_merge.py |  6 +++---
 tests/ci/functional_test_check.py  |  6 +++---
 tests/ci/integration_test_check.py |  6 +++---
 tests/ci/report.py                 | 26 ++++++++++++++------------
 tests/ci/sqllogic_test.py          |  7 ++++---
 8 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index f81a946e994..61d15373a78 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -15,7 +15,7 @@ from commit_status_helper import override_status
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import JobReport, TestResults
+from report import JobReport, StatusType, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -48,7 +48,7 @@ def get_run_command(
 def process_results(
     result_directory: Path,
     server_log_path: Path,
-) -> Tuple[str, str, TestResults, List[Path]]:
+) -> Tuple[StatusType, str, TestResults, List[Path]]:
     test_results = []  # type: TestResults
     additional_files = []  # type: List[Path]
     # Just upload all files from result_directory.
@@ -90,7 +90,7 @@ def process_results(
             additional_files,
         )
 
-    return state, description, test_results, additional_files
+    return state, description, test_results, additional_files  # type: ignore
 
 
 def parse_args():
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index c8dc6f713b3..9f1a33d7db6 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -67,7 +67,9 @@ class RerunHelper:
         return None
 
 
-def override_status(status: str, check_name: str, invert: bool = False) -> str:
+def override_status(
+    status: StatusType, check_name: str, invert: bool = False
+) -> StatusType:
     test_config = CI_CONFIG.test_configs.get(check_name)
     if test_config and test_config.force_tests:
         return SUCCESS
@@ -96,7 +98,7 @@ def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
 
 def post_commit_status(
     commit: Commit,
-    state: str,
+    state: StatusType,
     report_url: Optional[str] = None,
     description: Optional[str] = None,
     check_name: Optional[str] = None,
@@ -291,7 +293,7 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
     return "".join(result)
 
 
-def get_worst_state(statuses: CommitStatuses) -> str:
+def get_worst_state(statuses: CommitStatuses) -> StatusType:
     return get_worst_status(status.state for status in statuses)
 
 
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index e88deae9a38..4fdaabac633 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -19,7 +19,7 @@ from docker_images_helper import DockerImageData, docker_login, get_images_orede
 from env_helper import GITHUB_RUN_URL, RUNNER_TEMP
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from report import TestResult, TestResults
+from report import FAILURE, SUCCESS, StatusType, TestResult, TestResults
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -192,7 +192,7 @@ def main():
     #     additional_cache.append(str(pr_info.merged_pr))
 
     ok_cnt = 0
-    status = "success"
+    status = SUCCESS  # type: StatusType
     image_tags = (
         json.loads(args.image_tags)
         if not os.path.isfile(args.image_tags)
@@ -236,7 +236,7 @@ def main():
         if all(x.status == "OK" for x in res):
             ok_cnt += 1
         else:
-            status = "failure"
+            status = FAILURE
             break  # No need to continue with next images
 
     description = format_description(
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index f99bfc247cc..9d25ad65ce6 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -18,7 +18,7 @@ from commit_status_helper import format_description, get_commit, post_commit_sta
 from docker_images_helper import docker_login, get_images_oredered_list
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from report import TestResult
+from report import FAILURE, SUCCESS, StatusType, TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -148,7 +148,7 @@ def main():
         else json.load(open(args.missing_images))
     )
     test_results = []
-    status = "success"
+    status = SUCCESS  # type: StatusType
 
     ok_cnt, fail_cnt = 0, 0
     images = get_images_oredered_list()
@@ -176,7 +176,7 @@ def main():
         test_results.append(TestResult(manifest, test_result))
 
         if test_result != "OK":
-            status = "failure"
+            status = FAILURE
             fail_cnt += 1
         else:
             ok_cnt += 1
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index f5dabe0c26d..a05e2501504 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -34,7 +34,7 @@ from download_release_packages import download_last_release
 from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import TestResults, read_test_results
+from report import StatusType, TestResults, read_test_results
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -155,7 +155,7 @@ def get_tests_to_run(pr_info: PRInfo) -> List[str]:
 def process_results(
     result_directory: Path,
     server_log_path: Path,
-) -> Tuple[str, str, TestResults, List[Path]]:
+) -> Tuple[StatusType, str, TestResults, List[Path]]:
     test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_directory.
@@ -200,7 +200,7 @@ def process_results(
             additional_files,
         )
 
-    return state, description, test_results, additional_files
+    return state, description, test_results, additional_files  # type: ignore
 
 
 def parse_args():
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index fa2529d1b89..bab89ea0941 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -25,7 +25,7 @@ from get_robot_token import get_best_robot_token
 from github_helper import GitHub
 from integration_test_images import IMAGES
 from pr_info import PRInfo
-from report import ERROR, TestResult, TestResults, read_test_results
+from report import ERROR, StatusType, TestResult, TestResults, read_test_results
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -84,7 +84,7 @@ def get_env_for_runner(
 
 def process_results(
     result_directory: Path,
-) -> Tuple[str, str, TestResults, List[Path]]:
+) -> Tuple[StatusType, str, TestResults, List[Path]]:
     test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_directory.
@@ -118,7 +118,7 @@ def process_results(
             additional_files,
         )
 
-    return state, description, test_results, additional_files
+    return state, description, test_results, additional_files  # type: ignore
 
 
 def parse_args():
diff --git a/tests/ci/report.py b/tests/ci/report.py
index a1213300151..ce20c7293f9 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -35,26 +35,28 @@ OK: Final = "OK"
 FAIL: Final = "FAIL"
 
 StatusType = Literal["error", "failure", "pending", "success"]
+STATUSES = [ERROR, FAILURE, PENDING, SUCCESS]  # type: List[StatusType]
+
+
 # The order of statuses from the worst to the best
-_STATES = {ERROR: 0, FAILURE: 1, PENDING: 2, SUCCESS: 3}
+def _state_rank(status: str) -> int:
+    "return the index of status or index of SUCCESS in case of wrong status"
+    try:
+        return STATUSES.index(status)  # type: ignore
+    except ValueError:
+        return 3
 
 
-def get_worst_status(statuses: Iterable[str]) -> str:
-    worst_status = None
+def get_worst_status(statuses: Iterable[str]) -> StatusType:
+    worst_status = SUCCESS  # type: StatusType
     for status in statuses:
-        if _STATES.get(status) is None:
-            continue
-        if worst_status is None:
-            worst_status = status
-            continue
-        if _STATES.get(status) < _STATES.get(worst_status):
-            worst_status = status
+        ind = _state_rank(status)
+        if ind < _state_rank(worst_status):
+            worst_status = STATUSES[ind]
 
         if worst_status == ERROR:
             break
 
-    if worst_status is None:
-        return ""
     return worst_status
 
 
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index 14018eaa7a7..344eaea8277 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -18,6 +18,7 @@ from report import (
     OK,
     SUCCESS,
     JobReport,
+    StatusType,
     TestResult,
     TestResults,
     read_test_results,
@@ -46,7 +47,7 @@ def get_run_command(
     )
 
 
-def read_check_status(result_folder: Path) -> Tuple[str, str]:
+def read_check_status(result_folder: Path) -> Tuple[StatusType, str]:
     status_path = result_folder / "check_status.tsv"
     if not status_path.exists():
         return ERROR, "Not found check_status.tsv"
@@ -59,9 +60,9 @@ def read_check_status(result_folder: Path) -> Tuple[str, str]:
         if len(row) != 2:
             return ERROR, "Invalid check_status.tsv"
         if row[0] != SUCCESS:
-            return row[0], row[1]
+            return row[0], row[1]  # type: ignore
 
-    return status_rows[-1][0], status_rows[-1][1]
+    return status_rows[-1][0], status_rows[-1][1]  # type: ignore
 
 
 def parse_args() -> argparse.Namespace:

From c4846b661373dd59609583f2f5de79313d65a39d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 6 Feb 2024 15:55:16 +0300
Subject: [PATCH 384/884] MergeTree FINAL optimization diagnostics and settings

---
 src/Core/Settings.h                           |  2 +
 src/Core/SettingsChangesHistory.h             |  4 +-
 src/Processors/QueryPlan/PartsSplitter.cpp    | 38 +++++++++++++++----
 src/Processors/QueryPlan/PartsSplitter.h      |  3 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |  6 ++-
 5 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index dc863576a85..67bd721d3d1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -215,6 +215,8 @@ class IColumn;
     M(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), "The maximum number of bytes per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
+    M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
+    M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
     M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
     \
     M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index db3a76e29cd..a2612cab850 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -115,7 +115,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
-              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 7c66c0cc8df..363fdca22c5 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -228,7 +228,7 @@ struct SplitPartsRangesResult
     RangesInDataParts intersecting_parts_ranges;
 };
 
-SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts)
+SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, const LoggerPtr & logger)
 {
     /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts.
       *
@@ -483,10 +483,15 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts)
         intersecting_ranges_in_data_parts.end(),
         [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; });
 
+    LOG_TEST(logger, "Non intersecting ranges in data parts {}", non_intersecting_ranges_in_data_parts.getDescriptions().describe());
+    LOG_TEST(logger, "Intersecting ranges in data parts {}", intersecting_ranges_in_data_parts.getDescriptions().describe());
+
     return {std::move(non_intersecting_ranges_in_data_parts), std::move(intersecting_ranges_in_data_parts)};
 }
 
-std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts, size_t max_layers)
+std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts,
+    size_t max_layers,
+    const LoggerPtr & logger)
 {
     // We will advance the iterator pointing to the mark with the smallest PK value until
     // there will be not less than rows_per_layer rows in the current layer (roughly speaking).
@@ -591,8 +596,18 @@ std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersecting
         result_layers.back() = std::move(current_layer_builder.getCurrentRangesInDataParts());
     }
 
-    for (auto & layer : result_layers)
+    size_t result_layers_size = result_layers.size();
+    LOG_TEST(logger, "Split intersecting ranges into {} layers", result_layers_size);
+
+    for (size_t i = 0; i < result_layers_size; ++i)
     {
+        auto & layer = result_layers[i];
+
+        LOG_TEST(logger, "Layer {} {} filter values in ({}, {}])",
+            i,
+            layer.getDescriptions().describe(),
+            i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf");
+
         std::stable_sort(
             layer.begin(),
             layer.end(),
@@ -712,23 +727,32 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool force_process_all_ranges)
+    bool split_parts_ranges_into_intersecting_and_non_intersecting_final,
+    bool split_intersecting_parts_ranges_into_layers)
 {
     if (max_layers <= 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1");
 
+    auto logger = getLogger("PartsSplitter");
+
     SplitPartsWithRangesByPrimaryKeyResult result;
 
     RangesInDataParts intersecting_parts_ranges = std::move(parts);
 
-    if (!force_process_all_ranges)
+    if (split_parts_ranges_into_intersecting_and_non_intersecting_final)
     {
-        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges);
+        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges, logger);
         result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges);
         intersecting_parts_ranges = std::move(split_result.intersecting_parts_ranges);
     }
 
-    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers);
+    if (!split_intersecting_parts_ranges_into_layers)
+    {
+        result.merging_pipes.emplace_back(in_order_reading_step_getter(intersecting_parts_ranges));
+        return result;
+    }
+
+    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers, logger);
     auto filters = buildFilters(primary_key, borders);
     result.merging_pipes.resize(layers.size());
 
diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h
index f1ed1cb0b9c..9bceb344589 100644
--- a/src/Processors/QueryPlan/PartsSplitter.h
+++ b/src/Processors/QueryPlan/PartsSplitter.h
@@ -34,5 +34,6 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool force_process_all_ranges);
+    bool split_parts_ranges_into_intersecting_and_non_intersecting,
+    bool split_intersecting_parts_ranges_into_layers);
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 5ed56f59fc1..8a04caede80 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1175,7 +1175,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
 
                 /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column,
                 /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step.
-                bool force_process_all_ranges = !data.merging_params.is_deleted_column.empty();
+                bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final &&
+                    data.merging_params.is_deleted_column.empty();
 
                 SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey(
                     metadata_for_reading->getPrimaryKey(),
@@ -1184,7 +1185,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
                     num_streams,
                     context,
                     std::move(in_order_reading_step_getter),
-                    force_process_all_ranges);
+                    split_parts_ranges_into_intersecting_and_non_intersecting_final,
+                    settings.split_intersecting_parts_ranges_into_layers_final);
 
                 for (auto && non_intersecting_parts_range : split_ranges_result.non_intersecting_parts_ranges)
                     non_intersecting_parts_by_primary_key.push_back(std::move(non_intersecting_parts_range));

From ca4f46ab418afab56deea3ae51c2378e42489db7 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 6 Feb 2024 14:27:15 +0100
Subject: [PATCH 385/884] Check block size in bytes instead of rows

---
 src/Storages/System/StorageSystemZooKeeper.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 6d2166ff5f9..a4920d49b6d 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -511,11 +511,21 @@ Chunk SystemZooKeeperSource::generate()
     };
     std::vector<ListTask> list_tasks;
     std::unordered_set<String> added;
-    while (!paths.empty() && (max_block_size == 0 || row_count < max_block_size))
+    while (!paths.empty())
     {
         if (query_status)
             query_status->checkTimeLimit();
 
+        /// Check if the block is big enough already
+        if (max_block_size > 0 && row_count > 0)
+        {
+            size_t total_size = 0;
+            for (const auto & column : res_columns)
+                total_size += column->byteSize();
+            if (total_size > max_block_size)
+                break;
+        }
+
         list_tasks.clear();
         std::vector<String> paths_to_list;
         while (!paths.empty() && static_cast<Int64>(list_tasks.size()) < max_inflight_requests)

From ba94cd2ef70e3263aea35fa6c4687582ca1864f5 Mon Sep 17 00:00:00 2001
From: Val Doroshchuk <valbok@gmail.com>
Date: Tue, 30 Jan 2024 10:09:58 +0100
Subject: [PATCH 386/884] MaterializedMySQL: Fix gtid_after_attach_test to
 retry on detach

---
 .../materialized_with_ddl.py                              | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index 97c8b65f15d..57f2ccd720d 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -3379,7 +3379,7 @@ def gtid_after_attach_test(clickhouse_node, mysql_node, replication):
         f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())"
     )
 
-    db_count = 6
+    db_count = 4
     for i in range(db_count):
         replication.create_db_ch(
             f"{db}{i}",
@@ -3392,7 +3392,11 @@ def gtid_after_attach_test(clickhouse_node, mysql_node, replication):
         "t\n",
     )
     for i in range(int(db_count / 2)):
-        clickhouse_node.query(f"DETACH DATABASE {db}{i}")
+        check_query(
+            clickhouse_node,
+            f"DETACH DATABASE {db}{i}",
+            "",
+        )
 
     mysql_node.query(f"USE {db}")
     rows = 10000

From 13948cbb201cb76a335bcccfe86cca49313dac05 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 6 Feb 2024 13:39:34 +0100
Subject: [PATCH 387/884] Use constants from report instead of string literals

---
 tests/ci/artifacts_helper.py             |  9 +++---
 tests/ci/ast_fuzzer_check.py             |  4 +--
 tests/ci/bugfix_validate_check.py        | 13 +++++----
 tests/ci/ci.py                           | 12 ++++----
 tests/ci/clickbench.py                   | 10 +++----
 tests/ci/commit_status_helper.py         | 12 ++++----
 tests/ci/compatibility_check.py          | 24 ++++++++--------
 tests/ci/docker_images_check.py          |  2 +-
 tests/ci/docker_manifests_merge.py       |  2 +-
 tests/ci/docker_server.py                | 15 +++++-----
 tests/ci/docs_check.py                   | 16 +++++------
 tests/ci/fast_test_check.py              | 32 ++++++++++++++--------
 tests/ci/finish_check.py                 |  9 ++++--
 tests/ci/functional_test_check.py        | 16 +++++------
 tests/ci/integration_test_check.py       | 19 +++++++++----
 tests/ci/jepsen_check.py                 | 13 ++++-----
 tests/ci/mark_release_ready.py           |  5 ++--
 tests/ci/merge_pr.py                     |  8 ++++--
 tests/ci/performance_comparison_check.py | 35 +++++++++++++-----------
 tests/ci/release.py                      |  3 +-
 tests/ci/run_check.py                    |  4 +--
 tests/ci/sqlancer_check.py               | 13 ++++-----
 tests/ci/sqllogic_test.py                |  2 +-
 tests/ci/sqltest.py                      | 15 ++++------
 tests/ci/stress_check.py                 | 11 ++++----
 tests/ci/style_check.py                  | 10 +++----
 tests/ci/unit_tests_check.py             | 10 +++----
 27 files changed, 171 insertions(+), 153 deletions(-)

diff --git a/tests/ci/artifacts_helper.py b/tests/ci/artifacts_helper.py
index a9f3385585b..5feca927a96 100644
--- a/tests/ci/artifacts_helper.py
+++ b/tests/ci/artifacts_helper.py
@@ -10,14 +10,17 @@ from pathlib import Path
 from shutil import copy2
 from typing import List, Optional, Union
 
+# isort: off
 from github.Commit import Commit
 
+# isort: on
+
 from build_download_helper import download_build_with_progress
 from commit_status_helper import post_commit_status
 from compress_files import SUFFIX, compress_fast, decompress_fast
 from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET
 from git_helper import SHA_REGEXP
-from report import HEAD_HTML_TEMPLATE, FOOTER_HTML_TEMPLATE
+from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS
 from s3_helper import S3Helper
 
 ARTIFACTS_PATH = Path(RUNNER_TEMP) / "artifacts"
@@ -128,9 +131,7 @@ class ArtifactsHelper:
 
     @staticmethod
     def post_commit_status(commit: Commit, url: str) -> None:
-        post_commit_status(
-            commit, "success", url, "Artifacts for workflow", "Artifacts"
-        )
+        post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts")
 
     def _regenerate_index(self) -> None:
         if CI:
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 5a0eca51570..0a69d8aab49 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -7,9 +7,7 @@ import sys
 from pathlib import Path
 
 from build_download_helper import get_build_name_for_check, read_build_urls
-from clickhouse_helper import (
-    CiLogsCredentials,
-)
+from clickhouse_helper import CiLogsCredentials
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import PRInfo
diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py
index 107c02a0f56..7fda81f11b2 100644
--- a/tests/ci/bugfix_validate_check.py
+++ b/tests/ci/bugfix_validate_check.py
@@ -1,17 +1,20 @@
 #!/usr/bin/env python3
 
-from pathlib import Path
-from typing import List, Tuple, Optional
 import argparse
 import csv
 import logging
+from pathlib import Path
+from typing import List, Optional, Tuple
 
+# isort: off
 from github import Github
 
+# isort: on
+
 from commit_status_helper import get_commit, post_commit_status
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from report import TestResults, TestResult
+from report import ERROR, SUCCESS, TestResult, TestResults
 from s3_helper import S3Helper
 from upload_result_helper import upload_results
 
@@ -49,7 +52,7 @@ def process_result(file_path: Path) -> Tuple[bool, TestResults, Optional[str]]:
         )
         return False, [TestResult(f"{prefix}: {description}", status)], "Check failed"
 
-    is_ok = state == "success"
+    is_ok = state == SUCCESS
     if is_ok and report_url == "null":
         return is_ok, test_results, None
 
@@ -111,7 +114,7 @@ def main():
     commit = get_commit(gh, pr_info.sha)
     post_commit_status(
         commit,
-        "success" if is_ok else "error",
+        SUCCESS if is_ok else ERROR,
         report_url,
         description,
         check_name_with_group,
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index d2d05b1ed2d..622b7bb005a 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1,21 +1,21 @@
 import argparse
 import concurrent.futures
-from dataclasses import asdict, dataclass
-from enum import Enum
 import json
 import logging
 import os
 import re
 import subprocess
 import sys
-from pathlib import Path
 import time
+from dataclasses import asdict, dataclass
+from enum import Enum
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Union
 
 import docker_images_helper
 import upload_result_helper
 from build_check import get_release_or_pr
-from ci_config import CI_CONFIG, Build, Labels, JobNames
+from ci_config import CI_CONFIG, Build, JobNames, Labels
 from ci_utils import GHActions, is_hex
 from clickhouse_helper import (
     CiLogsCredentials,
@@ -859,7 +859,7 @@ def _mark_success_action(
         # there is no status for build jobs
         # create dummy success to mark it as done
         # FIXME: consider creating commit status for build jobs too, to treat everything the same way
-        CommitStatusData("success", "dummy description", "dummy_url").dump_status()
+        CommitStatusData(SUCCESS, "dummy description", "dummy_url").dump_status()
 
     job_status = None
     if CommitStatusData.exist():
@@ -1142,7 +1142,7 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
         if not job_status:
             return
         print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
-        assert job_status.status == "success", "BUG!"
+        assert job_status.status == SUCCESS, "BUG!"
         commit.create_status(
             state=job_status.status,
             target_url=job_status.report_url,
diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 61d15373a78..f8707cbcff7 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -15,7 +15,7 @@ from commit_status_helper import override_status
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import JobReport, StatusType, TestResults
+from report import ERROR, SUCCESS, JobReport, StatusType, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -70,7 +70,7 @@ def process_results(
 
     if len(status) != 1 or len(status[0]) != 2:
         logging.info("Files in result folder %s", os.listdir(result_directory))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
+        return ERROR, "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
     try:
@@ -80,11 +80,11 @@ def process_results(
             logging.info("Found %s", results_path.name)
         else:
             logging.info("Files in result folder %s", os.listdir(result_directory))
-            return "error", "Not found test_results.tsv", test_results, additional_files
+            return ERROR, "Not found test_results.tsv", test_results, additional_files
 
     except Exception as e:
         return (
-            "error",
+            ERROR,
             f"Cannot parse test_results.tsv ({e})",
             test_results,
             additional_files,
@@ -175,7 +175,7 @@ def main():
         additional_files=[run_log_path] + additional_logs,
     ).dump()
 
-    if state != "success":
+    if state != SUCCESS:
         if FORCE_TESTS_LABEL in pr_info.labels:
             print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
         else:
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 9f1a33d7db6..5dd2a33adaf 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -378,12 +378,12 @@ class CommitStatusData:
 def get_commit_filtered_statuses(commit: Commit) -> CommitStatuses:
     """
     Squash statuses to latest state
-    1. context="first", state="success", update_time=1
-    2. context="second", state="success", update_time=2
-    3. context="first", stat="failure", update_time=3
+    1. context="first", state=SUCCESS, update_time=1
+    2. context="second", state=SUCCESS, update_time=2
+    3. context="first", stat=FAILURE, update_time=3
     =========>
-    1. context="second", state="success"
-    2. context="first", stat="failure"
+    1. context="second", state=SUCCESS
+    2. context="first", stat=FAILURE
     """
     filtered = {}
     for status in sorted(commit.get_statuses(), key=lambda x: x.updated_at):
@@ -435,7 +435,7 @@ def format_description(description: str) -> str:
 def set_mergeable_check(
     commit: Commit,
     description: str = "",
-    state: StatusType = "success",
+    state: StatusType = SUCCESS,
 ) -> None:
     commit.create_status(
         context=MERGEABLE_NAME,
diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index a0c6294d8fd..a2e6c94cf48 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -1,17 +1,17 @@
 #!/usr/bin/env python3
 
-from distutils.version import StrictVersion
-from pathlib import Path
-from typing import List, Tuple
 import argparse
 import logging
 import subprocess
 import sys
+from distutils.version import StrictVersion
+from pathlib import Path
+from typing import List, Tuple
 
 from build_download_helper import download_builds_filter
 from docker_images_helper import DockerImage, get_docker_image, pull_image
-from env_helper import TEMP_PATH, REPORT_PATH
-from report import JobReport, TestResults, TestResult
+from env_helper import REPORT_PATH, TEMP_PATH
+from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
 from stopwatch import Stopwatch
 
 IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
@@ -55,19 +55,19 @@ def process_result(
     glibc_log_path = result_directory / "glibc.log"
     test_results = process_glibc_check(glibc_log_path, max_glibc_version)
 
-    status = "success"
+    status = SUCCESS
     description = "Compatibility check passed"
 
     if check_glibc:
         if len(test_results) > 1 or test_results[0].status != "OK":
-            status = "failure"
+            status = FAILURE
             description = "glibc check failed"
 
-    if status == "success" and check_distributions:
+    if status == SUCCESS and check_distributions:
         for operating_system in ("ubuntu:12.04", "centos:5"):
             test_result = process_os_check(result_directory / operating_system)
             if test_result.status != "OK":
-                status = "failure"
+                status = FAILURE
                 description = f"Old {operating_system} failed"
                 test_results += [test_result]
                 break
@@ -178,14 +178,14 @@ def main():
         )
         run_commands.extend(check_distributions_commands)
 
-    state = "success"
+    state = SUCCESS
     for run_command in run_commands:
         try:
             logging.info("Running command %s", run_command)
             subprocess.check_call(run_command, shell=True)
         except subprocess.CalledProcessError as ex:
             logging.info("Exception calling command %s", ex)
-            state = "failure"
+            state = FAILURE
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
 
@@ -215,7 +215,7 @@ def main():
         additional_files=additional_logs,
     ).dump()
 
-    if state == "failure":
+    if state == FAILURE:
         sys.exit(1)
 
 
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index 4fdaabac633..af0416d83dc 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -271,7 +271,7 @@ def main():
     ch_helper = ClickHouseHelper()
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if status == "failure":
+    if status == FAILURE:
         sys.exit(1)
 
 
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index 9d25ad65ce6..fc00969d5d6 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -212,7 +212,7 @@ def main():
     )
     ch_helper = ClickHouseHelper()
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
-    if status == "failure":
+    if status == FAILURE:
         sys.exit(1)
 
 
diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index b9e5c13ec42..7f53034fd0f 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -6,25 +6,26 @@ import json
 import logging
 import sys
 import time
+from os import makedirs
+from os import path as p
 from pathlib import Path
-from os import path as p, makedirs
 from typing import Dict, List
 
 from build_check import get_release_or_pr
+from build_download_helper import read_build_urls
 from docker_images_helper import DockerImageData, docker_login
 from env_helper import (
     GITHUB_RUN_URL,
     REPORT_PATH,
-    TEMP_PATH,
     S3_BUILDS_BUCKET,
     S3_DOWNLOAD,
+    TEMP_PATH,
 )
 from git_helper import Git
 from pr_info import PRInfo
-from report import JobReport, TestResults, TestResult
+from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
-from build_download_helper import read_build_urls
 from version_helper import (
     ClickHouseVersion,
     get_tagged_versions,
@@ -378,7 +379,7 @@ def main():
         docker_login()
 
     logging.info("Following tags will be created: %s", ", ".join(tags))
-    status = "success"
+    status = SUCCESS
     test_results = []  # type: TestResults
     for os in args.os:
         for tag in tags:
@@ -388,7 +389,7 @@ def main():
                 )
             )
             if test_results[-1].status != "OK":
-                status = "failure"
+                status = FAILURE
     pr_info = pr_info or PRInfo()
 
     description = f"Processed tags: {', '.join(tags)}"
@@ -401,7 +402,7 @@ def main():
         additional_files=[],
     ).dump()
 
-    if status != "success":
+    if status != SUCCESS:
         sys.exit(1)
 
 
diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py
index 400d4bc6ad5..6bd4ef49675 100644
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@@ -8,7 +8,7 @@ from pathlib import Path
 from docker_images_helper import get_docker_image, pull_image
 from env_helper import REPO_COPY, TEMP_PATH
 from pr_info import PRInfo
-from report import JobReport, TestResult, TestResults
+from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -49,7 +49,7 @@ def main():
         JobReport(
             description="No changes in docs",
             test_results=[],
-            status="success",
+            status=SUCCESS,
             start_time=stopwatch.start_time_str,
             duration=stopwatch.duration_seconds,
             additional_files=[],
@@ -79,11 +79,11 @@ def main():
         retcode = process.wait()
         if retcode == 0:
             logging.info("Run successfully")
-            status = "success"
+            status = SUCCESS
             description = "Docs check passed"
         else:
             description = "Docs check failed (non zero exit code)"
-            status = "failure"
+            status = FAILURE
             logging.info("Run failed")
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
@@ -92,7 +92,7 @@ def main():
     if not any(test_output.iterdir()):
         logging.error("No output files after docs check")
         description = "No output files after docs check"
-        status = "failure"
+        status = FAILURE
     else:
         for p in test_output.iterdir():
             additional_files.append(p)
@@ -101,9 +101,9 @@ def main():
                     if "ERROR" in line:
                         test_results.append(TestResult(line.split(":")[-1], "FAIL"))
         if test_results:
-            status = "failure"
+            status = FAILURE
             description = "Found errors in docs"
-        elif status != "failure":
+        elif status != FAILURE:
             test_results.append(TestResult("No errors found", "OK"))
         else:
             test_results.append(TestResult("Non zero exit code", "FAIL"))
@@ -117,7 +117,7 @@ def main():
         additional_files=additional_files,
     ).dump()
 
-    if status == "failure":
+    if status == FAILURE:
         sys.exit(1)
 
 
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index b558253ca95..e483e9d4ac2 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -11,7 +11,15 @@ from typing import Tuple
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import JobReport, TestResult, TestResults, read_test_results
+from report import (
+    ERROR,
+    FAILURE,
+    SUCCESS,
+    JobReport,
+    TestResult,
+    TestResults,
+    read_test_results,
+)
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -56,16 +64,16 @@ def process_results(result_directory: Path) -> Tuple[str, str, TestResults]:
             status = list(csv.reader(status_file, delimiter="\t"))
     if len(status) != 1 or len(status[0]) != 2:
         logging.info("Files in result folder %s", os.listdir(result_directory))
-        return "error", "Invalid check_status.tsv", test_results
+        return ERROR, "Invalid check_status.tsv", test_results
     state, description = status[0][0], status[0][1]
 
     try:
         results_path = result_directory / "test_results.tsv"
         test_results = read_test_results(results_path)
         if len(test_results) == 0:
-            return "error", "Empty test_results.tsv", test_results
+            return ERROR, "Empty test_results.tsv", test_results
     except Exception as e:
-        return ("error", f"Cannot parse test_results.tsv ({e})", test_results)
+        return (ERROR, f"Cannot parse test_results.tsv ({e})", test_results)
 
     return state, description, test_results
 
@@ -149,25 +157,25 @@ def main():
     test_results = []  # type: TestResults
     if "submodule_log.txt" not in test_output_files:
         description = "Cannot clone repository"
-        state = "failure"
+        state = FAILURE
     elif "cmake_log.txt" not in test_output_files:
         description = "Cannot fetch submodules"
-        state = "failure"
+        state = FAILURE
     elif "build_log.txt" not in test_output_files:
         description = "Cannot finish cmake"
-        state = "failure"
+        state = FAILURE
     elif "install_log.txt" not in test_output_files:
         description = "Cannot build ClickHouse"
-        state = "failure"
+        state = FAILURE
     elif not test_log_exists and not test_result_exists:
         description = "Cannot install or start ClickHouse"
-        state = "failure"
+        state = FAILURE
     else:
         state, description, test_results = process_results(output_path)
 
     if timeout_expired:
         test_results.append(TestResult.create_check_timeout_expired(args.timeout))
-        state = "failure"
+        state = FAILURE
         description = test_results[-1].name
 
     JobReport(
@@ -181,8 +189,8 @@ def main():
     ).dump()
 
     # Refuse other checks to run if fast test failed
-    if state != "success":
-        if state == "error":
+    if state != SUCCESS:
+        if state == ERROR:
             print("The status is 'error', report failure disregard the labels")
             sys.exit(1)
         elif FORCE_TESTS_LABEL in pr_info.labels:
diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py
index 6c615817164..e5268947304 100644
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@@ -1,7 +1,11 @@
 #!/usr/bin/env python3
 import logging
+
+# isort: off
 from github import Github
 
+# isort: on
+
 from commit_status_helper import (
     CI_STATUS_NAME,
     get_commit,
@@ -11,6 +15,7 @@ from commit_status_helper import (
 )
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
+from report import PENDING, SUCCESS
 
 
 def main():
@@ -31,10 +36,10 @@ def main():
         return
     # Take the latest status
     status = statuses[-1]
-    if status.state == "pending":
+    if status.state == PENDING:
         post_commit_status(
             commit,
-            "success",
+            SUCCESS,
             status.target_url,
             "All checks finished",
             CI_STATUS_NAME,
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index a05e2501504..e230aa5a679 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -34,7 +34,7 @@ from download_release_packages import download_last_release
 from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import StatusType, TestResults, read_test_results
+from report import ERROR, SUCCESS, StatusType, TestResults, read_test_results
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -177,7 +177,7 @@ def process_results(
 
     if len(status) != 1 or len(status[0]) != 2:
         logging.info("Files in result folder %s", os.listdir(result_directory))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
+        return ERROR, "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
     try:
@@ -187,14 +187,14 @@ def process_results(
             logging.info("Found test_results.tsv")
         else:
             logging.info("Files in result folder %s", os.listdir(result_directory))
-            return "error", "Not found test_results.tsv", test_results, additional_files
+            return ERROR, "Not found test_results.tsv", test_results, additional_files
 
         test_results = read_test_results(results_path)
         if len(test_results) == 0:
-            return "error", "Empty test_results.tsv", test_results, additional_files
+            return ERROR, "Empty test_results.tsv", test_results, additional_files
     except Exception as e:
         return (
-            "error",
+            ERROR,
             f"Cannot parse test_results.tsv ({e})",
             test_results,
             additional_files,
@@ -265,7 +265,7 @@ def main():
             post_commit_status_to_file(
                 post_commit_path,
                 f"Skipped (no pr-bugfix in {pr_info.labels})",
-                "success",
+                SUCCESS,
                 "null",
             )
         logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels)
@@ -286,7 +286,7 @@ def main():
     if run_changed_tests:
         tests_to_run = get_tests_to_run(pr_info)
         if not tests_to_run:
-            state = override_status("success", check_name, validate_bugfix_check)
+            state = override_status(SUCCESS, check_name, validate_bugfix_check)
             if args.post_commit_status == "commit_status":
                 post_commit_status(
                     commit,
@@ -418,7 +418,7 @@ def main():
     )
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state != "success":
+    if state != SUCCESS:
         if FORCE_TESTS_LABEL in pr_info.labels:
             print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
         else:
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index bab89ea0941..5af4d5e625b 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -25,7 +25,14 @@ from get_robot_token import get_best_robot_token
 from github_helper import GitHub
 from integration_test_images import IMAGES
 from pr_info import PRInfo
-from report import ERROR, StatusType, TestResult, TestResults, read_test_results
+from report import (
+    ERROR,
+    SUCCESS,
+    StatusType,
+    TestResult,
+    TestResults,
+    read_test_results,
+)
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -102,17 +109,17 @@ def process_results(
 
     if len(status) != 1 or len(status[0]) != 2:
         logging.info("Files in result folder %s", os.listdir(result_directory))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
+        return ERROR, "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
     try:
         results_path = result_directory / "test_results.tsv"
         test_results = read_test_results(results_path, False)
         if len(test_results) == 0:
-            return "error", "Empty test_results.tsv", test_results, additional_files
+            return ERROR, "Empty test_results.tsv", test_results, additional_files
     except Exception as e:
         return (
-            "error",
+            ERROR,
             f"Cannot parse test_results.tsv ({e})",
             test_results,
             additional_files,
@@ -182,7 +189,7 @@ def main():
             post_commit_status_to_file(
                 post_commit_path,
                 f"Skipped (no pr-bugfix in {pr_info.labels})",
-                "success",
+                SUCCESS,
                 "null",
             )
         logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels)
@@ -315,7 +322,7 @@ def main():
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state == "failure":
+    if state != SUCCESS:
         sys.exit(1)
 
 
diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py
index 93e33d62293..fb7540abda3 100644
--- a/tests/ci/jepsen_check.py
+++ b/tests/ci/jepsen_check.py
@@ -5,29 +5,26 @@ import logging
 import os
 import sys
 import time
-
 from pathlib import Path
 from typing import Any, List
 
 import boto3  # type: ignore
 import requests  # type: ignore
-
 from build_download_helper import (
     download_build_with_progress,
     get_build_name_for_check,
     read_build_urls,
 )
 from compress_files import compress_fast
-from env_helper import REPO_COPY, REPORT_PATH, S3_URL, TEMP_PATH, S3_BUILDS_BUCKET
+from env_helper import REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, S3_URL, TEMP_PATH
 from get_robot_token import get_parameter_from_ssm
 from git_helper import git_runner
 from pr_info import PRInfo
-from report import JobReport, TestResults, TestResult
+from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
 from ssh import SSHKey
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
-
 JEPSEN_GROUP_NAME = "jepsen_group"
 
 KEEPER_DESIRED_INSTANCE_COUNT = 3
@@ -263,21 +260,21 @@ def main():
             else:
                 logging.info("Run failed")
 
-    status = "success"
+    status = SUCCESS
     description = "No invalid analysis found ヽ(‘ー`)ノ"
     jepsen_log_path = result_path / "jepsen_run_all_tests.log"
     additional_data = []
     try:
         test_result = _parse_jepsen_output(jepsen_log_path)
         if any(r.status == "FAIL" for r in test_result):
-            status = "failure"
+            status = FAILURE
             description = "Found invalid analysis (ﾉಥ益ಥ）ﾉ ┻━┻"
 
         compress_fast(result_path / "store", result_path / "jepsen_store.tar.zst")
         additional_data.append(result_path / "jepsen_store.tar.zst")
     except Exception as ex:
         print("Exception", ex)
-        status = "failure"
+        status = FAILURE
         description = "No Jepsen output log"
         test_result = [TestResult("No Jepsen output log", "FAIL")]
 
diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py
index 0ad4b2bd2ed..011b3f28843 100755
--- a/tests/ci/mark_release_ready.py
+++ b/tests/ci/mark_release_ready.py
@@ -7,10 +7,11 @@ import os
 from commit_status_helper import get_commit, post_commit_status
 from env_helper import GITHUB_JOB_URL
 from get_robot_token import get_best_robot_token
+from git_helper import commit as commit_arg
 from github_helper import GitHub
 from pr_info import PRInfo
 from release import RELEASE_READY_STATUS
-from git_helper import commit as commit_arg
+from report import SUCCESS
 
 
 def main():
@@ -50,7 +51,7 @@ def main():
     gh.get_rate_limit()
     post_commit_status(
         commit,
-        "success",
+        SUCCESS,
         url,
         description,
         RELEASE_READY_STATUS,
diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py
index 772821f4960..cc92fe4f42c 100644
--- a/tests/ci/merge_pr.py
+++ b/tests/ci/merge_pr.py
@@ -4,21 +4,23 @@
 
 import argparse
 import logging
-
 from datetime import datetime
 from os import getenv
 from pprint import pformat
 from typing import Dict, List
 
+# isort: off
 from github.PaginatedList import PaginatedList
 from github.PullRequestReview import PullRequestReview
 from github.WorkflowRun import WorkflowRun
 
+# isort: on
+
 from commit_status_helper import get_commit_filtered_statuses
 from get_robot_token import get_best_robot_token
 from github_helper import GitHub, NamedUser, PullRequest, Repository
 from pr_info import PRInfo
-
+from report import SUCCESS
 
 # The team name for accepted approvals
 TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core")
@@ -269,7 +271,7 @@ def main():
         failed_statuses = [
             status.context
             for status in get_commit_filtered_statuses(commit)
-            if status.state != "success"
+            if status.state != SUCCESS
         ]
         if failed_statuses:
             logging.warning(
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 5a98fa06b39..f0af15397c7 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -1,35 +1,38 @@
 #!/usr/bin/env python3
 
-import os
-import logging
-import sys
 import json
-import subprocess
-import traceback
+import logging
+import os
 import re
+import subprocess
+import sys
+import traceback
 from pathlib import Path
 
+# isort: off
 from github import Github
 
-from commit_status_helper import get_commit
+# isort: on
+
+from build_download_helper import download_builds_filter
 from ci_config import CI_CONFIG
-from docker_images_helper import pull_image, get_docker_image
+from clickhouse_helper import get_instance_id, get_instance_type
+from commit_status_helper import get_commit
+from docker_images_helper import get_docker_image, pull_image
 from env_helper import (
     GITHUB_EVENT_PATH,
     GITHUB_RUN_URL,
     REPO_COPY,
+    REPORT_PATH,
     S3_BUILDS_BUCKET,
     S3_DOWNLOAD,
     TEMP_PATH,
-    REPORT_PATH,
 )
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
-from tee_popen import TeePopen
-from clickhouse_helper import get_instance_type, get_instance_id
+from report import FAILURE, SUCCESS, JobReport
 from stopwatch import Stopwatch
-from build_download_helper import download_builds_filter
-from report import SUCCESS, JobReport
+from tee_popen import TeePopen
 
 IMAGE_NAME = "clickhouse/performance-comparison"
 
@@ -225,18 +228,18 @@ def main():
         # TODO: Remove me, always green mode for the first time, unless errors
         status = SUCCESS
         if "errors" in message.lower() or too_many_slow(message.lower()):
-            status = "failure"
+            status = FAILURE
         # TODO: Remove until here
     except Exception:
         traceback.print_exc()
-        status = "failure"
+        status = FAILURE
         message = "Failed to parse the report."
 
     if not status:
-        status = "failure"
+        status = FAILURE
         message = "No status in report."
     elif not message:
-        status = "failure"
+        status = FAILURE
         message = "No message in report."
 
     JobReport(
diff --git a/tests/ci/release.py b/tests/ci/release.py
index f96845dad95..2b3331938e7 100755
--- a/tests/ci/release.py
+++ b/tests/ci/release.py
@@ -18,6 +18,7 @@ from contextlib import contextmanager
 from typing import Any, Final, Iterator, List, Optional, Tuple
 
 from git_helper import Git, commit, release_branch
+from report import SUCCESS
 from version_helper import (
     FILE_WITH_VERSION_PATH,
     GENERATED_CONTRIBUTORS,
@@ -142,7 +143,7 @@ class Release:
 
             for status in statuses:
                 if status["context"] == RELEASE_READY_STATUS:
-                    if not status["state"] == "success":
+                    if not status["state"] == SUCCESS:
                         raise Exception(
                             f"the status {RELEASE_READY_STATUS} is {status['state']}"
                             ", not success"
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index a6312872c2a..2aeac5b5740 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -27,7 +27,7 @@ from lambda_shared_package.lambda_shared.pr import (
     check_pr_description,
 )
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import FAILURE
+from report import FAILURE, PENDING
 
 TRUSTED_ORG_IDS = {
     54801242,  # clickhouse
@@ -188,7 +188,7 @@ def main():
     print("::notice ::Can run")
     post_commit_status(
         commit,
-        "pending",
+        PENDING,
         ci_report_url,
         description,
         CI_STATUS_NAME,
diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index f85ab2be9a3..59d2a3d6275 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -7,12 +7,9 @@ import sys
 from pathlib import Path
 
 from build_download_helper import get_build_name_for_check, read_build_urls
-from docker_images_helper import DockerImage, pull_image, get_docker_image
-from env_helper import (
-    REPORT_PATH,
-    TEMP_PATH,
-)
-from report import JobReport, TestResults, TestResult
+from docker_images_helper import DockerImage, get_docker_image, pull_image
+from env_helper import REPORT_PATH, TEMP_PATH
+from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -94,7 +91,7 @@ def main():
     paths += [workspace_path / f"{t}.err" for t in tests]
     paths += [workspace_path / f"{t}.out" for t in tests]
 
-    status = "success"
+    status = SUCCESS
     test_results = []  # type: TestResults
     # Try to get status message saved by the SQLancer
     try:
@@ -109,7 +106,7 @@ def main():
         with open(workspace_path / "description.txt", "r", encoding="utf-8") as desc_f:
             description = desc_f.readline().rstrip("\n")
     except:
-        status = "failure"
+        status = FAILURE
         description = "Task failed: $?=" + str(retcode)
 
     if not test_results:
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index 344eaea8277..bbd81fd76bb 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -172,7 +172,7 @@ def main():
         )
     )
 
-    # Until it pass all tests, do not block CI, report "success"
+    # Until it pass all tests, do not block CI, report SUCCESS
     assert description is not None
     # FIXME: force SUCCESS until all cases are fixed
     status = SUCCESS
diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py
index b2105d4f5c0..2fe6aabd69c 100644
--- a/tests/ci/sqltest.py
+++ b/tests/ci/sqltest.py
@@ -1,21 +1,16 @@
 #!/usr/bin/env python3
 
 import logging
-import subprocess
 import os
+import subprocess
 import sys
 from pathlib import Path
-from typing import Dict
-
 
 from build_download_helper import get_build_name_for_check, read_build_urls
-from docker_images_helper import pull_image, get_docker_image
-from env_helper import (
-    REPORT_PATH,
-    TEMP_PATH,
-)
+from docker_images_helper import get_docker_image, pull_image
+from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import PRInfo
-from report import JobReport, TestResult
+from report import SUCCESS, JobReport, TestResult
 from stopwatch import Stopwatch
 
 IMAGE_NAME = "clickhouse/sqltest"
@@ -98,7 +93,7 @@ def main():
         "report.html": workspace_path / "report.html",
         "test.log": workspace_path / "test.log",
     }
-    status = "success"
+    status = SUCCESS
     description = "See the report"
     test_results = [TestResult(description, "OK")]
 
diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py
index 46bb2261aba..49c1515c69f 100644
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@@ -10,11 +10,10 @@ from typing import List, Tuple
 
 from build_download_helper import download_all_deb_packages
 from clickhouse_helper import CiLogsCredentials
-
-from docker_images_helper import DockerImage, pull_image, get_docker_image
-from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
+from docker_images_helper import DockerImage, get_docker_image, pull_image
+from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
 from pr_info import PRInfo
-from report import JobReport, TestResult, TestResults, read_test_results
+from report import ERROR, JobReport, TestResult, TestResults, read_test_results
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -89,7 +88,7 @@ def process_results(
         status = list(csv.reader(status_file, delimiter="\t"))
 
     if len(status) != 1 or len(status[0]) != 2:
-        return "error", "Invalid check_status.tsv", test_results, additional_files
+        return ERROR, "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
     try:
@@ -99,7 +98,7 @@ def process_results(
             raise Exception("Empty results")
     except Exception as e:
         return (
-            "error",
+            ERROR,
             f"Cannot parse test_results.tsv ({e})",
             test_results,
             additional_files,
diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py
index 785e29b2359..0c7160aeea4 100644
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@@ -12,7 +12,7 @@ from docker_images_helper import get_docker_image, pull_image
 from env_helper import REPO_COPY, TEMP_PATH
 from git_helper import GIT_PREFIX, git_runner
 from pr_info import PRInfo
-from report import JobReport, TestResults, read_test_results
+from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results
 from ssh import SSHKey
 from stopwatch import Stopwatch
 
@@ -36,7 +36,7 @@ def process_result(
             status = list(csv.reader(status_file, delimiter="\t"))
     if len(status) != 1 or len(status[0]) != 2:
         logging.info("Files in result folder %s", os.listdir(result_directory))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
+        return ERROR, "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
     try:
@@ -47,8 +47,8 @@ def process_result(
 
         return state, description, test_results, additional_files
     except Exception:
-        if state == "success":
-            state, description = "error", "Failed to read test_results.tsv"
+        if state == SUCCESS:
+            state, description = ERROR, "Failed to read test_results.tsv"
         return state, description, test_results, additional_files
 
 
@@ -161,7 +161,7 @@ def main():
         additional_files=additional_files,
     ).dump()
 
-    if state in ["error", "failure"]:
+    if state in [ERROR, FAILURE]:
         print(f"Style check failed: [{description}]")
         sys.exit(1)
 
diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py
index 495547e1dfc..41c52d53020 100644
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@@ -3,15 +3,15 @@
 import json
 import logging
 import os
-import sys
 import subprocess
+import sys
 from pathlib import Path
 from typing import Tuple
 
 from build_download_helper import download_unit_tests
-from docker_images_helper import pull_image, get_docker_image
+from docker_images_helper import get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
-from report import ERROR, FAILURE, FAIL, OK, SUCCESS, JobReport, TestResults, TestResult
+from report import ERROR, FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult, TestResults
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 
@@ -104,7 +104,7 @@ def process_results(
             if "failures" in test_case:
                 raw_logs = ""
                 for failure in test_case["failures"]:
-                    raw_logs += failure["failure"]
+                    raw_logs += failure[FAILURE]
                 if (
                     "Segmentation fault" in raw_logs  # type: ignore
                     and SEGFAULT not in description
@@ -205,7 +205,7 @@ def main():
         additional_files=additional_files,
     ).dump()
 
-    if state == "failure":
+    if state == FAILURE:
         sys.exit(1)
 
 
From b9848dbd2c73698db7e281b78011463984282bd1 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 6 Feb 2024 14:30:19 +0000
Subject: [PATCH 388/884] Remove unused is_finished

---
 src/Client/ConnectionEstablisher.cpp | 4 +---
 src/Client/ConnectionEstablisher.h   | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp
index bc242604b01..c43aa6d8087 100644
--- a/src/Client/ConnectionEstablisher.cpp
+++ b/src/Client/ConnectionEstablisher.cpp
@@ -27,14 +27,12 @@ ConnectionEstablisher::ConnectionEstablisher(
     const Settings & settings_,
     LoggerPtr log_,
     const QualifiedTableName * table_to_check_)
-    : pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
+    : pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_)
 {
 }
 
 void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::string & fail_message)
 {
-    is_finished = false;
-    SCOPE_EXIT(is_finished = true);
     try
     {
         ProfileEvents::increment(ProfileEvents::DistributedConnectionTries);
diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index 52c78ff76c2..1fa08d435e9 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -30,8 +30,6 @@ public:
     /// Set async callback that will be called when reading from socket blocks.
     void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
 
-    bool isFinished() const { return is_finished; }
-
 private:
     ConnectionPoolPtr pool;
     const ConnectionTimeouts * timeouts;
@@ -39,7 +37,6 @@ private:
     LoggerPtr log;
     const QualifiedTableName * table_to_check;
 
-    bool is_finished;
     AsyncCallback async_callback = {};
 };
 

From 1abcf26df69bd75efb7f54960fc11486fe3a37a4 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 6 Feb 2024 15:39:52 +0000
Subject: [PATCH 389/884] Fix filter expressions

---
 src/Storages/StorageMerge.cpp | 187 +++++++++++++++++++++++-----------
 1 file changed, 129 insertions(+), 58 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5ef6b5117f6..df5b0cd715d 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1,7 +1,9 @@
 #include <algorithm>
 #include <functional>
+#include <iterator>
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/ColumnNode.h>
+#include <Analyzer/FunctionNode.h>
 #include <Analyzer/IdentifierNode.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/Passes/QueryAnalysisPass.h>
@@ -59,6 +61,7 @@
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
 #include "Core/NamesAndTypes.h"
+#include <Functions/FunctionFactory.h>
 
 namespace
 {
@@ -635,40 +638,106 @@ public:
     }
 };
 
-// bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
-// {
-//     QueryTreeNodes stack = { node };
-//     while (!stack.empty())
-//     {
-//         auto current = stack.back();
-//         stack.pop_back();
+bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
+{
+    QueryTreeNodes stack = { node };
+    while (!stack.empty())
+    {
+        auto current = stack.back();
+        stack.pop_back();
 
-//         switch (current->getNodeType())
-//         {
-//             case QueryTreeNodeType::CONSTANT:
-//                 break;
-//             case QueryTreeNodeType::COLUMN:
-//             {
-//                 auto * column_node = current->as<ColumnNode>();
-//                 auto source = column_node->getColumnSourceOrNull();
-//                 if (source != replacement_table_expression)
-//                     return true;
-//                 break;
-//             }
-//             default:
-//             {
-//                 for (const auto & child : current->getChildren())
-//                 {
-//                     if (child)
-//                         stack.push_back(child);
-//                 }
-//             }
-//         }
-//     }
-//     return false;
-// }
+        switch (current->getNodeType())
+        {
+            case QueryTreeNodeType::CONSTANT:
+                break;
+            case QueryTreeNodeType::COLUMN:
+            {
+                auto * column_node = current->as<ColumnNode>();
+                auto source = column_node->getColumnSourceOrNull();
+                if (source != replacement_table_expression)
+                    return true;
+                break;
+            }
+            default:
+            {
+                for (const auto & child : current->getChildren())
+                {
+                    if (child)
+                        stack.push_back(child);
+                }
+            }
+        }
+    }
+    return false;
+}
 
-QueryTreeNodePtr removeJoin(
+void replaceFilterExpression(
+    QueryTreeNodePtr & expression,
+    const QueryTreeNodePtr & replacement_table_expression,
+    const ContextPtr & context)
+{
+    auto * function = expression->as<FunctionNode>();
+    if (!function)
+        return;
+
+    if (function->getFunctionName() != "and")
+    {
+        if (hasUnknownColumn(expression, replacement_table_expression))
+            expression = nullptr;
+        return;
+    }
+
+    QueryTreeNodes conjunctions;
+    QueryTreeNodes processing{ expression };
+
+    while (!processing.empty())
+    {
+        auto node = std::move(processing.back());
+        processing.pop_back();
+
+        if (auto * function_node = node->as<FunctionNode>())
+        {
+            if (function_node->getFunctionName() == "and")
+                std::copy(
+                    function_node->getArguments().begin(),
+                    function_node->getArguments().end(),
+                    std::back_inserter(processing)
+                );
+            else
+                conjunctions.push_back(node);
+        }
+        else
+        {
+            conjunctions.push_back(node);
+        }
+    }
+
+    std::swap(processing, conjunctions);
+
+    for (const auto & node : processing)
+    {
+        if (!hasUnknownColumn(node, replacement_table_expression))
+            conjunctions.push_back(node);
+    }
+
+    if (conjunctions.empty())
+    {
+        expression = {};
+        return;
+    }
+    if (conjunctions.size() == 1)
+    {
+        expression = conjunctions[0];
+        return;
+    }
+
+    function->getArguments().getNodes() = std::move(conjunctions);
+
+    const auto function_impl = FunctionFactory::instance().get("and", context);
+    function->resolveAsFunction(function_impl->build(function->getArgumentColumns()));
+}
+
+QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
     QueryTreeNodePtr query,
     QueryTreeNodePtr original_table_expression,
     QueryTreeNodePtr replacement_table_expression,
@@ -676,9 +745,12 @@ QueryTreeNodePtr removeJoin(
     const Names & required_column_names)
 {
     auto * query_node = query->as<QueryNode>();
-    auto join_tree = query_node->getJoinTree();
+    auto join_tree_type = query_node->getJoinTree()->getNodeType();
     auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
 
+    if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION)
+        return modified_query;
+
     auto * modified_query_node = modified_query->as<QueryNode>();
 
     modified_query = modified_query->cloneAndReplace(modified_query_node->getJoinTree(), replacement_table_expression);
@@ -686,41 +758,40 @@ QueryTreeNodePtr removeJoin(
 
     query_node = modified_query->as<QueryNode>();
 
-    //TODO: change the predicates to make it valid and execute it on shards.
-    query_node->getPrewhere() = {};
-    query_node->getWhere() = {};
+    if (query_node->hasPrewhere())
+        replaceFilterExpression(query_node->getPrewhere(), replacement_table_expression, context);
+    if (query_node->hasWhere())
+        replaceFilterExpression(query_node->getWhere(), replacement_table_expression, context);
+
     query_node->getGroupBy().getNodes().clear();
     query_node->getHaving() = {};
     query_node->getOrderBy().getNodes().clear();
 
-    if (join_tree->as<TableNode>() == nullptr && join_tree->as<TableFunctionNode>() == nullptr)
+    auto & projection = modified_query_node->getProjection().getNodes();
+    projection.clear();
+    NamesAndTypes projection_columns;
+
+    for (auto const & column_name : required_column_names)
     {
-        auto & projection = modified_query_node->getProjection().getNodes();
-        projection.clear();
-        NamesAndTypes projection_columns;
+        QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
 
-        for (auto const & column_name : required_column_names)
-        {
-            QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
+        QueryAnalysisPass query_analysis_pass(original_table_expression);
+        query_analysis_pass.run(fake_node, context);
 
-            QueryAnalysisPass query_analysis_pass(original_table_expression);
-            query_analysis_pass.run(fake_node, context);
+        auto * resolved_column = fake_node->as<ColumnNode>();
+        if (!resolved_column)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
+        auto fake_column = resolved_column->getColumn();
 
-            auto * resolved_column = fake_node->as<ColumnNode>();
-            if (!resolved_column)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
-            auto fake_column = resolved_column->getColumn();
+        ApplyAliasColumnExpressionsVisitor visitor;
+        visitor.visit(fake_node);
 
-            ApplyAliasColumnExpressionsVisitor visitor;
-            visitor.visit(fake_node);
-
-            projection.push_back(fake_node);
-            projection_columns.push_back(fake_column);
-        }
-
-        query_node->resolveProjectionColumns(std::move(projection_columns));
+        projection.push_back(fake_node);
+        projection_columns.push_back(fake_column);
     }
 
+    query_node->resolveProjectionColumns(std::move(projection_columns));
+
     LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Result:\n{}", modified_query->dumpTree());
 
     return modified_query;
@@ -746,7 +817,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
         if (query_info.table_expression_modifiers)
             replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers);
 
-        modified_query_info.query_tree = removeJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression, modified_context, required_column_names);
+        modified_query_info.query_tree = replaceTableExpressionAndRemoveJoin(modified_query_info.query_tree, modified_query_info.table_expression, replacement_table_expression, modified_context, required_column_names);
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 

From 29908ddd09f85957e61c7ffc95f16f382a0fd0fa Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 6 Feb 2024 15:59:13 +0000
Subject: [PATCH 390/884] Remove commented code. Add more comments.

---
 src/Planner/Planner.cpp                   | 11 +--
 src/Planner/Planner.h                     |  2 +
 src/Planner/PlannerContext.h              |  4 +
 src/Planner/Utils.cpp                     |  4 +
 src/Planner/findParallelReplicasQuery.cpp | 96 +++++++++--------------
 src/Planner/findParallelReplicasQuery.h   |  6 ++
 6 files changed, 52 insertions(+), 71 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 8dcce627cce..64d43a2ba1f 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -482,11 +482,6 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
         settings.max_block_size,
         settings.min_hit_rate_to_use_consecutive_keys_optimization);
 
-    // WriteBufferFromOwnString buf;
-    // params.explain(buf, 0);
-    // std::cerr << "........... " << buf.str() << std::endl;
-    // std::cerr << query_plan.getCurrentDataStream().header.dumpStructure() << std::endl;
-
     bool is_remote_storage = false;
     bool parallel_replicas_from_merge_tree = false;
 
@@ -1072,7 +1067,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
         Planner subquery_planner(
             query_tree,
             subquery_options,
-            std::make_shared<GlobalPlannerContext>(nullptr, nullptr)); //planner_context->getGlobalPlannerContext());
+            std::make_shared<GlobalPlannerContext>(nullptr, nullptr));
         subquery_planner.buildQueryPlanIfNeeded();
 
         subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));
@@ -1444,10 +1439,6 @@ void Planner::buildPlanForQueryNode()
     auto & mapping = join_tree_query_plan.query_node_to_plan_step_mapping;
     query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
 
-    // WriteBufferFromOwnString buf;
-    // query_plan.explainPlan(buf, {.header = true});
-    // LOG_TRACE(&Poco::Logger::get("Planner"), "Plan\n{}", buf.str());
-
     LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}",
         query_tree->formatConvertedASTForErrorMessage(),
         QueryProcessingStage::toString(from_stage),
diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h
index 5667686b058..ae78f05cbd4 100644
--- a/src/Planner/Planner.h
+++ b/src/Planner/Planner.h
@@ -65,6 +65,8 @@ public:
         return planner_context;
     }
 
+    /// We support mapping QueryNode -> QueryPlanStep (the last step added to plan from this query)
+    /// It is useful for parallel replicas analysis.
     using QueryNodeToPlanStepMapping = std::unordered_map<const QueryNode *, const QueryPlan::Node *>;
     const QueryNodeToPlanStepMapping & getQueryNodeToPlanStepMapping() const { return query_node_to_plan_step_mapping; }
 
diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h
index d0e1497061e..fe9eabc558b 100644
--- a/src/Planner/PlannerContext.h
+++ b/src/Planner/PlannerContext.h
@@ -46,7 +46,11 @@ public:
     /// Check if context has column identifier
     bool hasColumnIdentifier(const ColumnIdentifier & column_identifier);
 
+    /// The query which will be executed with parallel replicas.
+    /// In case if only the most inner subquery can be executed with parallel replicas, node is nullptr.
     const QueryNode * const parallel_replicas_node = nullptr;
+    /// Table which is used with parallel replicas reading. Now, only one table is supported by the protocol.
+    /// It is the left-most table of the query (in JOINs, UNIONs and subqueries).
     const TableNode * const parallel_replicas_table = nullptr;
 
 private:
diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp
index e127a1a053e..50ffa83a272 100644
--- a/src/Planner/Utils.cpp
+++ b/src/Planner/Utils.cpp
@@ -152,6 +152,10 @@ static void removeCTEs(ASTPtr & ast)
 ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node)
 {
     auto ast = queryNodeToSelectQuery(query_node);
+    /// Remove CTEs information from distributed queries.
+    /// Now, if cte_name is set for subquery node, AST -> String serialization will only print cte name.
+    /// But CTE is defined only for top-level query part, so may not be sent.
+    /// Removing cte_name forces subquery to be always printed.
     removeCTEs(ast);
     return ast;
 }
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index cf14fc08351..8b03fc97bec 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -6,17 +6,17 @@
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Planner/PlannerJoinTree.h>
 #include <Planner/Utils.h>
-#include "Analyzer/ArrayJoinNode.h"
-#include "Analyzer/InDepthQueryTreeVisitor.h"
-#include "Analyzer/JoinNode.h"
-#include "Analyzer/QueryNode.h"
-#include "Analyzer/TableNode.h"
-#include "Analyzer/UnionNode.h"
-#include "Parsers/ASTSubquery.h"
-#include "Parsers/queryToString.h"
-#include "Processors/QueryPlan/ExpressionStep.h"
-#include "Processors/QueryPlan/FilterStep.h"
-#include "Storages/MergeTree/MergeTreeData.h"
+#include <Analyzer/ArrayJoinNode.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/JoinNode.h>
+#include <Analyzer/QueryNode.h>
+#include <Analyzer/TableNode.h>
+#include <Analyzer/UnionNode.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/queryToString.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/StorageDummy.h>
 
 namespace DB
@@ -28,6 +28,11 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
 }
 
+/// Returns a list of (sub)queries (candidates) which may support parallel replicas.
+/// The rule is :
+/// subquery has only LEFT or ALL INNER JOIN (or none), and left part is MergeTree table or subquery candidate as well.
+///
+/// Additional checks are required, so we return many candidates. The innermost subquery is on top.
 std::stack<const QueryNode *> getSupportingParallelReplicasQuery(const IQueryTreeNode * query_tree_node)
 {
     std::stack<const QueryNode *> res;
@@ -42,6 +47,7 @@ std::stack<const QueryNode *> getSupportingParallelReplicasQuery(const IQueryTre
             {
                 const auto & table_node = query_tree_node->as<TableNode &>();
                 const auto & storage = table_node.getStorage();
+                /// Here we check StorageDummy as well, to support a query tree with replaced storages.
                 if (std::dynamic_pointer_cast<MergeTreeData>(storage) || typeid_cast<const StorageDummy *>(storage.get()))
                     return res;
 
@@ -143,6 +149,9 @@ QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, co
     return query->cloneAndReplace(visitor.replacement_map);
 }
 
+/// Find the best candidate for parallel replicas execution by verifying query plan.
+/// If query plan has only Expression, Filter of Join steps, we can execute it fully remotely and check the next query.
+/// Otherwise we can execute current query up to WithMergableStage only.
 const QueryNode * findParallelReplicasQuery(
     std::stack<const QueryNode *> stack,
     const std::unordered_map<const QueryNode *, const QueryPlan::Node *> & mapping)
@@ -155,12 +164,6 @@ const QueryNode * findParallelReplicasQuery(
         const QueryNode * subquery_node = stack.top();
         stack.pop();
 
-        // std::cerr << "----- trying " << reinterpret_cast<const void *>(subquery_node) << std::endl;
-
-        // const QueryNode * mapped_node = subquery_node;
-        // if (auto it = replacement_map.find(subquery_node); it != replacement_map.end())
-        //     mapped_node = it->second.get();
-
         auto it = mapping.find(subquery_node);
         /// This should not happen ideally.
         if (it == mapping.end())
@@ -170,8 +173,6 @@ const QueryNode * findParallelReplicasQuery(
         const QueryPlan::Node * next_node_to_check = curr_node;
         bool can_distribute_full_node = true;
 
-        // std::cerr << "trying " << curr_node->step->getName() << '\n' << subquery_node->dumpTree() << std::endl;
-
         while (next_node_to_check && next_node_to_check != prev_checked_node)
         {
             const auto & children = next_node_to_check->children;
@@ -210,8 +211,8 @@ const QueryNode * findParallelReplicasQuery(
         /// Will try to execute query up to WithMergableStage
         if (!can_distribute_full_node)
         {
-            /// Current query node does not contain subqueries already.
-            /// We can execute parallel replicas over storage.
+            /// Current query node does not contain subqueries.
+            /// We can execute parallel replicas over storage::read.
             if (!res)
                 return nullptr;
 
@@ -240,15 +241,11 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
             query_tree_node->formatASTForErrorMessage());
 
     auto context = query_node ? query_node->getContext() : union_node->getContext();
-    // const auto & settings = context->getSettingsRef();
 
     if (!context->canUseParallelReplicasOnInitiator())
         return nullptr;
 
     auto stack = getSupportingParallelReplicasQuery(query_tree_node.get());
-    // std::cerr << "=============== findParallelReplicasQuery stack size " << stack.size() << std::endl;
-    // std::cerr << "=============== findParallelReplicasQuery tree\n " << query_tree_node->dumpTree() << std::endl;
-    // std::cerr << "=============== findParallelReplicasQuery trace \n" << StackTrace().toString() << std::endl;
     /// Empty stack means that storage does not support parallel replicas.
     if (stack.empty())
         return nullptr;
@@ -257,37 +254,27 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
     if (stack.top() == query_tree_node.get())
         return nullptr;
 
+    /// This is needed to avoid infinite recursion.
     auto mutable_context = Context::createCopy(context);
     mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
 
+    /// Here we replace tables to dummy, in order to build a temporary query plan for parallel replicas analysis.
     ResultReplacementMap replacement_map;
     auto updated_query_tree = replaceTablesWithDummyTables(query_tree_node, mutable_context);
 
-    // std::cerr << "=============== findParallelReplicasQuery updated tree\n " << updated_query_tree->dumpTree() << std::endl;
-
     SelectQueryOptions options;
-    //options.only_analyze = true;
     Planner planner(updated_query_tree, options, std::make_shared<GlobalPlannerContext>(nullptr, nullptr));
     planner.buildQueryPlanIfNeeded();
 
-    // WriteBufferFromOwnString buf;
-    // planner.getQueryPlan().explainPlan(buf, {.actions = true});
-    // std::cerr << buf.str() << std::endl;
-
+    /// This part is a bit clumsy.
+    /// We updated a query_tree with dummy storages, and mapping is using updated_query_tree now.
+    /// But QueryNode result should be taken from initial query tree.
+    /// So that we build a list of candidates again, and call findParallelReplicasQuery for it.
     auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get());
-
-    //const auto & result_query_plan = planner.getQueryPlan();
     const auto & mapping = planner.getQueryNodeToPlanStepMapping();
-
-    // for (const auto & [k, v] : mapping)
-    //     std::cerr << "----- " << v->step->getName() << '\n' << reinterpret_cast<const void *>(k) << std::endl;
-
     const auto * res = findParallelReplicasQuery(new_stack, mapping);
-    // if (res)
-    //     std::cerr << "Result subtree " << res->dumpTree() << std::endl;
-    // else
-    //     std::cerr << "Result subtree is empty" << std::endl;
 
+    /// Now, return a query from initial stack.
     if (res)
     {
         while (!new_stack.empty())
@@ -383,7 +370,6 @@ const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tr
             query_tree_node->formatASTForErrorMessage());
 
     auto context = query_node ? query_node->getContext() : union_node->getContext();
-    // const auto & settings = context->getSettingsRef();
 
     if (!context->canUseParallelReplicasOnFollower())
         return nullptr;
@@ -396,9 +382,6 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     const PlannerContextPtr & planner_context,
     std::shared_ptr<const StorageLimitsList> storage_limits)
 {
-    // std::cerr << "buildQueryPlanForParallelReplicas 1 " << query_node.dumpTree() << std::endl;
-    ASTPtr modified_query_ast;
-    Block header;
     auto processed_stage = QueryProcessingStage::WithMergeableState;
     auto context = planner_context->getQueryContext();
 
@@ -408,23 +391,10 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
         modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
 
     rewriteJoinToGlobalJoin(modified_query_tree, context);
-    // std::cerr << "buildQueryPlanForParallelReplicas 1 " << modified_query_tree->dumpTree() << std::endl;
     modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
-    // std::cerr << "buildQueryPlanForParallelReplicas 2 " << modified_query_tree->dumpTree() << std::endl;
-    modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
+    ASTPtr modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
 
-    // std::cerr << "buildQueryPlanForParallelReplicas AST " << queryToString(modified_query_ast) << std::endl;
-    // std::cerr << "buildQueryPlanForParallelReplicas AST " << modified_query_ast->dumpTree() << std::endl;
-
-    // SelectQueryOptions opt(processed_stage);
-    // Planner planner(modified_query_tree, opt, std::make_shared<GlobalPlannerContext>(nullptr));
-    // planner.buildQueryPlanIfNeeded();
-    // header = planner.getQueryPlan().getCurrentDataStream().header;
-
-    // InterpreterSelectQueryAnalyzer interpreter(modified_query_tree, context, SelectQueryOptions(processed_stage));
-    // header = interpreter.getSampleBlock();
-
-    header = InterpreterSelectQueryAnalyzer::getSampleBlock(
+    Block header = InterpreterSelectQueryAnalyzer::getSampleBlock(
         modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
 
     ClusterProxy::SelectStreamFactory select_stream_factory =
@@ -447,6 +417,10 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
         initial_header.getColumnsWithTypeAndName(),
         ActionsDAG::MatchColumnsMode::Position);
 
+    /// initial_header is a header expected by initial query.
+    /// header is a header which is returned by the follower.
+    /// They are different because tables will have different aliases (e.g. _table1 or _table5).
+    /// Here we just rename columns by position, with the hope the types would match.
     auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting));
     step->setStepDescription("Convert distributed names");
     query_plan.addStep(std::move(step));
diff --git a/src/Planner/findParallelReplicasQuery.h b/src/Planner/findParallelReplicasQuery.h
index ab30de3adaf..ee910cbdaaa 100644
--- a/src/Planner/findParallelReplicasQuery.h
+++ b/src/Planner/findParallelReplicasQuery.h
@@ -13,7 +13,11 @@ using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
 
 struct SelectQueryOptions;
 
+/// Find a qury which can be executed with parallel replicas up to WithMergableStage.
+/// Returned query will always contain some (>1) subqueries, possibly with joins.
 const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
+
+/// Find a table from which we should read on follower replica. It's the left-most table within all JOINs and UNIONs.
 const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
 
 struct JoinTreeQueryPlan;
@@ -24,6 +28,8 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
 struct StorageLimits;
 using StorageLimitsList = std::list<StorageLimits>;
 
+/// Execute QueryNode with parallel replicas up to WithMergableStage and return a plan.
+/// This method does not check that QueryNode is valid. Ideally it should be a result of findParallelReplicasQuery.
 JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     const QueryNode & query_node,
     const PlannerContextPtr & planner_context,

From aad9f49626d9b64aa299b5433020e012ba489207 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 1 Feb 2024 16:10:59 +0000
Subject: [PATCH 391/884] analyzer fix
 test_select_access_rights/test_main.py::test_select_count

---
 src/Planner/PlannerJoinTree.cpp             | 44 +++++++++++++++------
 tests/analyzer_integration_broken_tests.txt |  1 -
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index ab25f6d2423..fc0e17e70a1 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -84,32 +84,38 @@ namespace
 {
 
 /// Check if current user has privileges to SELECT columns from table
-void checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context)
+/// Throws an exception if access to any column from `column_names` is not granted
+/// If `column_names` is empty, check access to any columns and retun names of accessible columns
+NameSet checkAccessRights(const TableNode & table_node, Names & column_names, const ContextPtr & query_context)
 {
     /// StorageDummy is created on preliminary stage, ignore access check for it.
     if (typeid_cast<const StorageDummy *>(table_node.getStorage().get()))
-        return;
+        return {};
 
     const auto & storage_id = table_node.getStorageID();
     const auto & storage_snapshot = table_node.getStorageSnapshot();
 
     if (column_names.empty())
     {
+        NameSet accessible_columns;
         /** For a trivial queries like "SELECT count() FROM table", "SELECT 1 FROM table" access is granted if at least
           * one table column is accessible.
           */
         auto access = query_context->getAccess();
-
         for (const auto & column : storage_snapshot->metadata->getColumns())
         {
             if (access->isGranted(AccessType::SELECT, storage_id.database_name, storage_id.table_name, column.name))
-                return;
+                accessible_columns.insert(column.name);
         }
 
-        throw Exception(ErrorCodes::ACCESS_DENIED,
-            "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}",
-            query_context->getUserName(),
-            storage_id.getFullTableName());
+        if (accessible_columns.empty())
+        {
+            throw Exception(ErrorCodes::ACCESS_DENIED,
+                "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}",
+                query_context->getUserName(),
+                storage_id.getFullTableName());
+        }
+        return accessible_columns;
     }
 
     // In case of cross-replication we don't know what database is used for the table.
@@ -117,6 +123,8 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names,
     // Each shard will use the default database (in the case of cross-replication shards may have different defaults).
     if (storage_id.hasDatabase())
         query_context->checkAccess(AccessType::SELECT, storage_id, column_names);
+
+    return {};
 }
 
 bool shouldIgnoreQuotaAndLimits(const TableNode & table_node)
@@ -133,7 +141,7 @@ bool shouldIgnoreQuotaAndLimits(const TableNode & table_node)
     return false;
 }
 
-NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot)
+NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot, const NameSet & column_names_allowed_to_select)
 {
     /** We need to read at least one column to find the number of rows.
       * We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
@@ -167,6 +175,18 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage
     auto column_sizes = storage->getColumnSizes();
     auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns());
 
+    if (!column_names_allowed_to_select.empty())
+    {
+        auto it = column_names_and_types.begin();
+        while (it != column_names_and_types.end())
+        {
+            if (!column_names_allowed_to_select.contains(it->name))
+                it = column_names_and_types.erase(it);
+            else
+                ++it;
+        }
+    }
+
     if (!column_sizes.empty())
     {
         for (auto & column_name_and_type : column_names_and_types)
@@ -330,12 +350,13 @@ void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expr
     /** The current user must have the SELECT privilege.
       * We do not check access rights for table functions because they have been already checked in ITableFunction::execute().
       */
+    NameSet columns_names_allowed_to_select;
     if (table_node)
     {
         auto column_names_with_aliases = columns_names;
         const auto & alias_columns_names = table_expression_data.getAliasColumnsNames();
         column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end());
-        checkAccessRights(*table_node, column_names_with_aliases, query_context);
+        columns_names_allowed_to_select = checkAccessRights(*table_node, column_names_with_aliases, query_context);
     }
 
     if (columns_names.empty())
@@ -346,8 +367,7 @@ void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expr
         {
             const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
             const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
-            additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot);
-
+            additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot, columns_names_allowed_to_select);
         }
         else if (query_node || union_node)
         {
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index 06142f98da1..c1c7882a4b9 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -12,7 +12,6 @@ test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_replicating_constants/test.py::test_different_versions
 test_select_access_rights/test_main.py::test_alias_columns
-test_select_access_rights/test_main.py::test_select_count
 test_select_access_rights/test_main.py::test_select_join
 test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote

From 9438118745297d39b02392fdaabb8d22f8a849d7 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 2 Feb 2024 09:29:55 +0000
Subject: [PATCH 392/884] fix
 test_select_access_rights/test_main.py::test_select_join

---
 src/Planner/PlannerJoinTree.cpp               |  2 +-
 tests/analyzer_integration_broken_tests.txt   |  1 -
 .../test_select_access_rights/test_main.py    | 41 +++++++++++++------
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index fc0e17e70a1..aced82645d0 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -85,7 +85,7 @@ namespace
 
 /// Check if current user has privileges to SELECT columns from table
 /// Throws an exception if access to any column from `column_names` is not granted
-/// If `column_names` is empty, check access to any columns and retun names of accessible columns
+/// If `column_names` is empty, check access to any columns and return names of accessible columns
 NameSet checkAccessRights(const TableNode & table_node, Names & column_names, const ContextPtr & query_context)
 {
     /// StorageDummy is created on preliminary stage, ignore access check for it.
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c1c7882a4b9..c04ed440c18 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -12,7 +12,6 @@ test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_replicating_constants/test.py::test_different_versions
 test_select_access_rights/test_main.py::test_alias_columns
-test_select_access_rights/test_main.py::test_select_join
 test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
diff --git a/tests/integration/test_select_access_rights/test_main.py b/tests/integration/test_select_access_rights/test_main.py
index eedecc2d30c..bca3c698911 100644
--- a/tests/integration/test_select_access_rights/test_main.py
+++ b/tests/integration/test_select_access_rights/test_main.py
@@ -1,6 +1,7 @@
 import pytest
+
+import re
 from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import TSV
 
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance("instance")
@@ -185,25 +186,39 @@ def test_select_join():
     )
 
     select_query = "SELECT * FROM table1 JOIN table2 USING(d)"
-    assert (
-        "it's necessary to have the grant SELECT(d, x, y) ON default.table2"
-        in instance.query_and_get_error(select_query, user="A")
-    )
+
+    def match_error(err, columns, table):
+        """Check if the error message contains the expected table and columns"""
+
+        match = re.search(
+            r"it's necessary to have the grant SELECT\((.*)\) ON default\.(\w+)", err
+        )
+        if not match:
+            return False
+        if match.group(2) != table:
+            return False
+        assert set(match.group(1).split(", ")) == set(
+            columns.split(", ")
+        ), f"expected {columns} in {err}"
+        return True
+
+    response = instance.query_and_get_error(select_query, user="A")
+    table1_match = match_error(response, "d, a, b", "table1")
+    table2_match = match_error(response, "d, x, y", "table2")
+    assert table1_match or table2_match, response
 
     instance.query("GRANT SELECT(d, x, y) ON default.table2 TO A")
-    assert (
-        "it's necessary to have the grant SELECT(d, a, b) ON default.table1"
-        in instance.query_and_get_error(select_query, user="A")
-    )
+    response = instance.query_and_get_error(select_query, user="A")
+    assert match_error(response, "d, a, b", "table1")
 
+    response = instance.query_and_get_error(select_query, user="A")
     instance.query("GRANT SELECT(d, a, b) ON default.table1 TO A")
+
     assert instance.query(select_query, user="A") == ""
 
     instance.query("REVOKE SELECT ON default.table2 FROM A")
-    assert (
-        "it's necessary to have the grant SELECT(d, x, y) ON default.table2"
-        in instance.query_and_get_error(select_query, user="A")
-    )
+    response = instance.query_and_get_error(select_query, user="A")
+    assert match_error(response, "d, x, y", "table2")
 
 
 def test_select_union():

From 5d042dc815a8e5ccf9136fb4191d610e8d193814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 6 Feb 2024 17:42:24 +0100
Subject: [PATCH 393/884] Check pointer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index de7788997be..99425518623 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5570,7 +5570,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                 column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type);
             }
 
-            if (column->getDataType() != result_type->getColumnType())
+            if (column && column->getDataType() != result_type->getColumnType())
                 throw Exception(
                     ErrorCodes::LOGICAL_ERROR,
                     "Unexpected return type from {}. Expected {}. Got {}",

From 8a428eb7cfbe72acdbd197a924fad604765e217e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 6 Feb 2024 18:37:14 +0100
Subject: [PATCH 394/884] Better usability for server versions in ClickHouse
 Cloud

---
 programs/client/Client.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 0988e1eb4a1..cc142470d7f 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -504,7 +504,7 @@ void Client::connect()
                         << "It may lack support for new features." << std::endl
                         << std::endl;
         }
-        else if (client_version_tuple > server_version_tuple)
+        else if (client_version_tuple > server_version_tuple && server_display_name != "clickhouse-cloud")
         {
             std::cout << "ClickHouse server version is older than ClickHouse client. "
                         << "It may indicate that the server is out of date and can be upgraded." << std::endl

From 5c68f9cabdd44ed147c586702793980f7c70a2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 6 Feb 2024 19:09:28 +0100
Subject: [PATCH 395/884] Maintain function alias in
 RewriteSumFunctionWithSumAndCountVisitor

---
 ...writeSumFunctionWithSumAndCountVisitor.cpp |  6 ++
 ..._rewrite_sum_column_and_constant.reference | 72 ++++++++++---------
 .../02931_rewrite_sum_column_and_constant.sql |  5 ++
 3 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp
index b654d28d750..2f5e597bdab 100644
--- a/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp
+++ b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp
@@ -100,7 +100,10 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
         if (!new_ast)
             return;
         else
+        {
+            new_ast->setAlias(ast->tryGetAlias());
             ast = new_ast;
+        }
     }
     else if (column_id == 1)
     {
@@ -116,7 +119,10 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
         if (!new_ast)
             return;
         else
+        {
+            new_ast->setAlias(ast->tryGetAlias());
             ast = new_ast;
+        }
     }
 }
 
diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference
index 8f29910e9ae..3124698d218 100644
--- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference
+++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference
@@ -47,24 +47,24 @@ SELECT sum(uint64) + (1 * count(uint64))
 FROM test_table
 WHERE ((uint64 + 1) AS i) > 0
 EXPLAIN SYNTAX (SELECT sum(uint64 + 1) AS j from test_table having j > 0);
-SELECT sum(uint64) + (1 * count(uint64))
+SELECT sum(uint64) + (1 * count(uint64)) AS j
 FROM test_table
-HAVING (sum(uint64) + (1 * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0);
-SELECT sum(uint64) + (1 * count(uint64))
+SELECT sum(uint64) + (1 * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 + 1) AS i) > 0
-HAVING (sum(uint64) + (1 * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0);
-SELECT sum(uint64) + ((1 AS n) * count(uint64))
+SELECT sum(uint64) + ((1 AS n) * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 AS m) > 0) AND (n > 0)
-HAVING (sum(uint64) + (n * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0);
-SELECT sum(uint64) + ((1 AS n) * count(uint64))
+SELECT sum(uint64) + ((1 AS n) * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m + n) AS i) > 0)
-HAVING (sum(uint64) + (n * count(uint64))) > 0
+HAVING j > 0
 SELECT sum(1 + uint64 AS i) from test_table where i > 0;
 20
 SELECT sum(1 + uint64) AS j from test_table having j > 0;
@@ -80,24 +80,24 @@ SELECT (1 * count(uint64)) + sum(uint64)
 FROM test_table
 WHERE ((1 + uint64) AS i) > 0
 EXPLAIN SYNTAX (SELECT sum(1 + uint64) AS j from test_table having j > 0);
-SELECT (1 * count(uint64)) + sum(uint64)
+SELECT (1 * count(uint64)) + sum(uint64) AS j
 FROM test_table
-HAVING ((1 * count(uint64)) + sum(uint64)) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0);
-SELECT (1 * count(uint64)) + sum(uint64)
+SELECT (1 * count(uint64)) + sum(uint64) AS j
 FROM test_table
 WHERE ((1 + uint64) AS i) > 0
-HAVING ((1 * count(uint64)) + sum(uint64)) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0);
-SELECT ((1 AS m) * count(uint64)) + sum(uint64)
+SELECT ((1 AS m) * count(uint64)) + sum(uint64) AS j
 FROM test_table
 WHERE (m > 0) AND ((uint64 AS n) > 0)
-HAVING ((m * count(uint64)) + sum(uint64)) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0);
-SELECT ((1 AS m) * count(uint64)) + sum(uint64)
+SELECT ((1 AS m) * count(uint64)) + sum(uint64) AS j
 FROM test_table
 WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m + n) AS i) > 0)
-HAVING ((m * count(uint64)) + sum(uint64)) > 0
+HAVING j > 0
 SELECT sum(uint64 - 1 AS i) from test_table where i > 0;
 10
 SELECT sum(uint64 - 1) AS j from test_table having j > 0;
@@ -113,24 +113,24 @@ SELECT sum(uint64) - (1 * count(uint64))
 FROM test_table
 WHERE ((uint64 - 1) AS i) > 0
 EXPLAIN SYNTAX (SELECT sum(uint64 - 1) AS j from test_table having j > 0);
-SELECT sum(uint64) - (1 * count(uint64))
+SELECT sum(uint64) - (1 * count(uint64)) AS j
 FROM test_table
-HAVING (sum(uint64) - (1 * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0);
-SELECT sum(uint64) - (1 * count(uint64))
+SELECT sum(uint64) - (1 * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 - 1) AS i) > 0
-HAVING (sum(uint64) - (1 * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0);
-SELECT sum(uint64) - ((1 AS n) * count(uint64))
+SELECT sum(uint64) - ((1 AS n) * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 AS m) > 0) AND (n > 0)
-HAVING (sum(uint64) - (n * count(uint64))) > 0
+HAVING j > 0
 EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0);
-SELECT sum(uint64) - ((1 AS n) * count(uint64))
+SELECT sum(uint64) - ((1 AS n) * count(uint64)) AS j
 FROM test_table
 WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m - n) AS i) > 0)
-HAVING (sum(uint64) - (n * count(uint64))) > 0
+HAVING j > 0
 SELECT sum(1 - uint64 AS i) from test_table;
 -10
 SELECT sum(1 - uint64) AS j from test_table;
@@ -146,24 +146,24 @@ SELECT (1 * count(uint64)) - sum(uint64)
 FROM test_table
 WHERE ((1 - uint64) AS i) > 0
 EXPLAIN SYNTAX (SELECT sum(1 - uint64) AS j from test_table having j < 0);
-SELECT (1 * count(uint64)) - sum(uint64)
+SELECT (1 * count(uint64)) - sum(uint64) AS j
 FROM test_table
-HAVING ((1 * count(uint64)) - sum(uint64)) < 0
+HAVING j < 0
 EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0);
-SELECT (1 * count(uint64)) - sum(uint64)
+SELECT (1 * count(uint64)) - sum(uint64) AS j
 FROM test_table
 WHERE ((1 - uint64) AS i) > 0
-HAVING ((1 * count(uint64)) - sum(uint64)) < 0
+HAVING j < 0
 EXPLAIN SYNTAX (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0);
-SELECT ((1 AS m) * count(uint64)) - sum(uint64)
+SELECT ((1 AS m) * count(uint64)) - sum(uint64) AS j
 FROM test_table
 WHERE (m > 0) AND ((uint64 AS n) > 0)
-HAVING ((m * count(uint64)) - sum(uint64)) < 0
+HAVING j < 0
 EXPLAIN SYNTAX (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0);
-SELECT ((1 AS m) * count(uint64)) - sum(uint64)
+SELECT ((1 AS m) * count(uint64)) - sum(uint64) AS j
 FROM test_table
 WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m - n) AS i) < 0)
-HAVING ((m * count(uint64)) - sum(uint64)) < 0
+HAVING j < 0
 SELECT sum(uint64 + 2.11) From test_table;
 25.549999999999997
 SELECT sum(2.11 + uint64) From test_table;
@@ -474,3 +474,11 @@ FROM test_table
 EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table);
 SELECT ((2 * count(decimal32)) - sum(decimal32)) + ((3 * count(decimal32)) - sum(decimal32))
 FROM test_table
+-- https://github.com/ClickHouse/ClickHouse/issues/59414
+SELECT sum(uint64 + 2) as j, j + 5 as t from test_table;
+25	30
+EXPLAIN SYNTAX SELECT sum(uint64 + 2) as j, j + 5 as t from test_table;
+SELECT
+    sum(uint64) + (2 * count(uint64)) AS j,
+    j + 5 AS t
+FROM test_table
diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql
index b29407d7208..c7b0ff82442 100644
--- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql
+++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql
@@ -204,6 +204,11 @@ EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32)
 EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table);
 EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table);
 EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table);
+
+-- https://github.com/ClickHouse/ClickHouse/issues/59414
+SELECT sum(uint64 + 2) as j, j + 5 as t from test_table;
+EXPLAIN SYNTAX SELECT sum(uint64 + 2) as j, j + 5 as t from test_table;
 -- { echoOff }
 
+
 DROP TABLE IF EXISTS test_table;

From 5894fa9860406b8473f0dca27aec1ec2115ba6c8 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 6 Feb 2024 19:09:04 +0100
Subject: [PATCH 396/884] `0` means number of cores

---
 programs/server/Server.cpp | 6 ++++--
 src/Core/ServerSettings.h  | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 8d63eeb2cab..7d3953ae37e 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -826,10 +826,12 @@ try
         0, // We don't need any threads one all the parts will be deleted
         server_settings.max_parts_cleaning_thread_pool_size);
 
+    auto max_database_replicated_create_table_thread_pool_size = server_settings.max_database_replicated_create_table_thread_pool_size ?
+        server_settings.max_database_replicated_create_table_thread_pool_size : getNumberOfPhysicalCPUCores();
     getDatabaseReplicatedCreateTablesThreadPool().initialize(
-        server_settings.max_database_replicated_create_table_thread_pool_size,
+        max_database_replicated_create_table_thread_pool_size,
         0, // We don't need any threads once all the tables will be created
-        server_settings.max_database_replicated_create_table_thread_pool_size);
+        max_database_replicated_create_table_thread_pool_size);
 
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 92e91cbca54..b10c0d8e093 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -114,7 +114,7 @@ namespace DB
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
-    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two means tables will be created sequentially.", 0) \
+    M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 

From 3b03aea16bdef344f56e38039ae204bef11a4ad1 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 6 Feb 2024 19:10:45 +0100
Subject: [PATCH 397/884] Use vector instead of a map

---
 src/Databases/DatabaseReplicated.cpp    |  4 ++--
 src/Databases/TablesDependencyGraph.cpp |  8 ++++----
 src/Databases/TablesDependencyGraph.h   | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index fcefa2ae253..9cf19a251f7 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1094,12 +1094,12 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     tables_dependencies.checkNoCyclicDependencies();
 
     auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1;
-    auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
+    auto tables_to_create_by_level = tables_dependencies.getTablesSplitByDependencyLevel();
 
     auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
     std::vector<std::future<void>> create_table_futures;
 
-    for (const auto & [_, tables_to_create] : tables_to_create_by_level)
+    for (const auto & tables_to_create : tables_to_create_by_level)
     {
         for (const auto & table_id : tables_to_create)
         {
diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index 007bcb5ab17..bda9c81253b 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -699,14 +699,14 @@ std::vector<StorageID> TablesDependencyGraph::getTablesSortedByDependency() cons
 }
 
 
-std::map<size_t, std::vector<StorageID>> TablesDependencyGraph::getTablesSortedByDependencyWithLevels() const
+std::vector<std::vector<StorageID>> TablesDependencyGraph::getTablesSplitByDependencyLevel() const
 {
-    std::map<size_t, std::vector<StorageID>> tables_by_level;
+    std::vector<std::vector<StorageID>> tables_split_by_level;
     for (const auto * node : getNodesSortedByLevel())
     {
-        tables_by_level[node->level].emplace_back(node->storage_id);
+        tables_split_by_level[node->level].emplace_back(node->storage_id);
     }
-    return tables_by_level;
+    return tables_split_by_level;
 }
 
 
diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h
index 18cdc999ee1..eb13539b5b6 100644
--- a/src/Databases/TablesDependencyGraph.h
+++ b/src/Databases/TablesDependencyGraph.h
@@ -107,11 +107,11 @@ public:
     /// tables which depend on the tables which depend on the tables without dependencies, and so on.
     std::vector<StorageID> getTablesSortedByDependency() const;
 
-    /// Returns a map of lists of tables by the number of dependencies they have:
-    /// tables without dependencies first with level 0, then
-    /// tables with depend on the tables without dependencies with level 1, then
-    /// tables which depend on the tables which depend on the tables without dependencies with level 2, and so on.
-    std::map<size_t, std::vector<StorageID>> getTablesSortedByDependencyWithLevels() const;
+    /// Returns a list of lists of tables by the number of dependencies they have:
+    /// tables without dependencies are in the first list, then
+    /// tables which depend on the tables without dependencies are in the second list, then
+    /// tables which depend on the tables which depend on the tables without dependencies are in the third list, and so on.
+    std::vector<std::vector<StorageID>> getTablesSplitByDependencyLevel() const;
 
     /// Outputs information about this graph as a bunch of logging messages.
     void log() const;

From 0f515e7759cf0ff5e1200465dece3bf12cd1f9f1 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 6 Feb 2024 19:37:22 +0100
Subject: [PATCH 398/884] Reserve space in vector

---
 src/Databases/TablesDependencyGraph.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index bda9c81253b..fe358694f9a 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -702,7 +702,9 @@ std::vector<StorageID> TablesDependencyGraph::getTablesSortedByDependency() cons
 std::vector<std::vector<StorageID>> TablesDependencyGraph::getTablesSplitByDependencyLevel() const
 {
     std::vector<std::vector<StorageID>> tables_split_by_level;
-    for (const auto * node : getNodesSortedByLevel())
+    auto sorted_nodes = getNodesSortedByLevel();
+    tables_split_by_level.reserve(sorted_nodes.back()->level);
+    for (const auto * node : sorted_nodes)
     {
         tables_split_by_level[node->level].emplace_back(node->storage_id);
     }

From 648fa525832644a89b55dba9f6957969060e1a69 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 6 Feb 2024 18:48:57 +0000
Subject: [PATCH 399/884] move instead copy

---
 utils/self-extracting-executable/decompressor.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 1f19a349d65..6614403c0ab 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -322,6 +322,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
             return 1;
         }
 
+        if (0 != munmap(output, le64toh(file_info.uncompressed_size)))
+            perror("munmap");
         if (0 != fsync(output_fd))
             perror("fsync");
         if (0 != close(output_fd))
@@ -528,10 +530,10 @@ int main(int/* argc*/, char* argv[])
         (void)snprintf(decompressed_name, decompressed_name_len + 1, decompressed_name_fmt, self, decompressed_suffix);
 
         std::error_code ec;
-        std::filesystem::copy_file(static_cast<char *>(decompressed_name), static_cast<char *>(self), ec);
-        if (ec)
+
+        if (link(decompressed_name, self))
         {
-            std::cerr << ec.message() << std::endl;
+            perror("link");
             return 1;
         }
 

From b43f90fce209351972fafd420e1b62d58159ba81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 6 Feb 2024 19:55:01 +0100
Subject: [PATCH 400/884] Fix query start time on non initial queries

---
 src/Interpreters/executeQuery.cpp             | 18 ++++--------
 .../02985_shard_query_start_time.reference    |  2 ++
 .../02985_shard_query_start_time.sql          | 29 +++++++++++++++++++
 3 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/02985_shard_query_start_time.reference
 create mode 100644 tests/queries/0_stateless/02985_shard_query_start_time.sql

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 1787f627c2e..5c16a5d800f 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -663,15 +663,17 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     if (query_span && query_span->trace_id != UUID{})
         LOG_TRACE(getLogger("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id);
 
+    /// Used for logging query start time in system.query_log
     auto query_start_time = std::chrono::system_clock::now();
 
-    /// Used to set the watch in QueryStatus and the output formats. It is not based on query_start_time as that might be based on
-    /// the value passed by the client
+    /// Used for:
+    /// * Setting the watch in QueryStatus (controls timeouts and progress) and the output formats
+    /// * Logging query duration (system.query_log)
     Stopwatch start_watch{CLOCK_MONOTONIC};
 
     const auto & client_info = context->getClientInfo();
 
-    if (!internal)
+    if (!internal && client_info.initial_query_start_time == 0)
     {
         // If it's not an internal query and we don't see an initial_query_start_time yet, initialize it
         // to current time. Internal queries are those executed without an independent client context,
@@ -679,15 +681,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         // possible to have unset initial_query_start_time for non-internal and non-initial queries. For
         // example, the query is from an initiator that is running an old version of clickhouse.
         // On the other hand, if it's initialized then take it as the start of the query
-        if (client_info.initial_query_start_time == 0)
-        {
-            context->setInitialQueryStartTime(query_start_time);
-        }
-        else
-        {
-            query_start_time = std::chrono::time_point<std::chrono::system_clock>(
-                std::chrono::microseconds{client_info.initial_query_start_time_microseconds});
-        }
+        context->setInitialQueryStartTime(query_start_time);
     }
 
     assert(internal || CurrentThread::get().getQueryContext());
diff --git a/tests/queries/0_stateless/02985_shard_query_start_time.reference b/tests/queries/0_stateless/02985_shard_query_start_time.reference
new file mode 100644
index 00000000000..1957f3a9604
--- /dev/null
+++ b/tests/queries/0_stateless/02985_shard_query_start_time.reference
@@ -0,0 +1,2 @@
+1	1
+1	1
diff --git a/tests/queries/0_stateless/02985_shard_query_start_time.sql b/tests/queries/0_stateless/02985_shard_query_start_time.sql
new file mode 100644
index 00000000000..b0d8d2b6e53
--- /dev/null
+++ b/tests/queries/0_stateless/02985_shard_query_start_time.sql
@@ -0,0 +1,29 @@
+DROP TABLE IF EXISTS sharded_table;
+CREATE TABLE sharded_table (dummy UInt8) ENGINE = Distributed('test_cluster_two_shards', 'system', 'one');
+
+SELECT * FROM sharded_table FORMAT Null SETTINGS log_comment='02985_shard_query_start_time_query_1';
+
+SYSTEM FLUSH LOGS;
+
+-- We do not test for query_start_time because that would conflict pretty easily
+WITH
+(
+    SELECT
+        (query_id, query_start_time_microseconds)
+    FROM
+        system.query_log
+    WHERE
+            event_date >= yesterday()
+      AND current_database = currentDatabase()
+      AND log_comment = '02985_shard_query_start_time_query_1'
+      AND type = 'QueryFinish'
+    ORDER BY query_start_time_microseconds DESC
+    LIMIT 1
+) AS id_and_start_tuple
+SELECT
+    query_start_time_microseconds > initial_query_start_time_microseconds,
+    initial_query_start_time_microseconds = id_and_start_tuple.2
+FROM
+    system.query_log
+WHERE
+    NOT is_initial_query AND initial_query_id = id_and_start_tuple.1;

From a64ce706719aa5ecf8f7bd954eaf50b96ce59df0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 6 Feb 2024 20:11:04 +0100
Subject: [PATCH 401/884] Pin python dependencies in stateless tests

---
 docker/test/stateless/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index 66ba0a58e03..f09ba46de1e 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -47,7 +47,7 @@ RUN apt-get update -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
-RUN pip3 install numpy scipy pandas Jinja2 pyarrow
+RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
 
 RUN mkdir -p /tmp/clickhouse-odbc-tmp \
    && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

From 0a4e73a98dc4d0471957ad65aecb8b16bae1070f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 1 Feb 2024 15:50:48 +0100
Subject: [PATCH 402/884] Strict aliasing for Int8

---
 base/base/CMakeLists.txt                      |  1 +
 base/base/int8_to_string.cpp                  |  9 +++++
 base/base/int8_to_string.h                    | 34 +++++++++++++++++++
 base/base/types.h                             | 19 +++++++++--
 src/Common/Exception.h                        |  5 +--
 src/Core/Field.h                              |  3 +-
 src/Functions/divide/divide.cpp               |  4 +--
 src/Functions/divide/divideImpl.cpp           |  8 +++--
 .../Formats/Impl/CHColumnToArrowColumn.cpp    |  5 +++
 9 files changed, 78 insertions(+), 10 deletions(-)
 create mode 100644 base/base/int8_to_string.cpp
 create mode 100644 base/base/int8_to_string.h

diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt
index 3886932d198..025687d2c59 100644
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@@ -17,6 +17,7 @@ set (SRCS
     getMemoryAmount.cpp
     getPageSize.cpp
     getThreadId.cpp
+    int8_to_string.cpp
     JSON.cpp
     mremap.cpp
     phdr_cache.cpp
diff --git a/base/base/int8_to_string.cpp b/base/base/int8_to_string.cpp
new file mode 100644
index 00000000000..09da940e1a0
--- /dev/null
+++ b/base/base/int8_to_string.cpp
@@ -0,0 +1,9 @@
+#include <base/int8_to_string.h>
+
+namespace std
+{
+std::string to_string(Int8 v)
+{
+    return to_string(int8_t{v});
+}
+}
diff --git a/base/base/int8_to_string.h b/base/base/int8_to_string.h
new file mode 100644
index 00000000000..db92630ad86
--- /dev/null
+++ b/base/base/int8_to_string.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <base/defines.h>
+#include <base/types.h>
+
+#include <fmt/format.h>
+
+template <>
+struct fmt::formatter<Int8>
+{
+    constexpr auto parse(format_parse_context & ctx)
+    {
+        const auto * it = ctx.begin();
+        const auto * end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const Int8 & value, FormatContext & ctx) -> decltype(ctx.out())
+    {
+        return format<FormatContext>(int8_t{value}, ctx);
+    }
+};
+
+
+namespace std
+{
+std::string to_string(Int8 v);
+}
diff --git a/base/base/types.h b/base/base/types.h
index 3a7760eae91..52e9df65f04 100644
--- a/base/base/types.h
+++ b/base/base/types.h
@@ -3,14 +3,29 @@
 #include <cstdint>
 #include <string>
 
-/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
+/// Using char8_t more strict aliasing (https://stackoverflow.com/a/57453713)
 using UInt8 = char8_t;
 
+/// Same for using signed _BitInt(8) (there isn't a signed char8_t, which would be more convenient)
+/// See https://godbolt.org/z/fafnWEnnf
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbit-int-extension"
+using Int8 = signed _BitInt(8);
+#pragma clang diagnostic pop
+
+namespace std
+{
+template <>
+struct hash<Int8>
+{
+    size_t operator()(const Int8 x) const { return std::hash<int8_t>()(int8_t{x}); }
+};
+}
+
 using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;
 
-using Int8 = int8_t;
 using Int16 = int16_t;
 using Int32 = int32_t;
 using Int64 = int64_t;
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 94e4a986feb..21f9529a49c 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -9,10 +9,11 @@
 
 #include <base/defines.h>
 #include <base/errnoToString.h>
+#include <base/int8_to_string.h>
 #include <base/scope_guard.h>
-#include <Common/LoggingFormatStringHelpers.h>
-#include <Common/Logger.h>
 #include <Common/AtomicLogger.h>
+#include <Common/Logger.h>
+#include <Common/LoggingFormatStringHelpers.h>
 #include <Common/StackTrace.h>
 
 #include <fmt/format.h>
diff --git a/src/Core/Field.h b/src/Core/Field.h
index f9ce70efbc5..414874310a6 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -216,9 +216,8 @@ using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
 template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
 template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
 template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
-#ifdef __cpp_char8_t
 template <> struct NearestFieldTypeImpl<char8_t> { using Type = UInt64; };
-#endif
+template <> struct NearestFieldTypeImpl<Int8> { using Type = Int64; };
 
 template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
 template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
diff --git a/src/Functions/divide/divide.cpp b/src/Functions/divide/divide.cpp
index cf2cd354a7d..6262d42a666 100644
--- a/src/Functions/divide/divide.cpp
+++ b/src/Functions/divide/divide.cpp
@@ -49,9 +49,9 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
 template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
-template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t);
+template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);
 
 template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
-template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t);
+template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);
diff --git a/src/Functions/divide/divideImpl.cpp b/src/Functions/divide/divideImpl.cpp
index 966d5777c1d..6d44b427582 100644
--- a/src/Functions/divide/divideImpl.cpp
+++ b/src/Functions/divide/divideImpl.cpp
@@ -12,6 +12,10 @@
 
 #include <libdivide.h>
 
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbit-int-extension"
+using Int8 = signed _BitInt(8);
+#pragma clang diagnostic pop
 
 namespace NAMESPACE
 {
@@ -62,11 +66,11 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
 template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
-template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t);
+template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);
 
 template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
-template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t);
+template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);
 
 }
diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index 1048bdad22f..9d6c8420069 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -130,6 +130,11 @@ namespace DB
                 reinterpret_cast<const uint8_t *>(internal_data.data() + start),
                 end - start,
                 reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        else if constexpr (std::is_same_v<NumericType, Int8>)
+            status = builder.AppendValues(
+                reinterpret_cast<const int8_t *>(internal_data.data() + start),
+                end - start,
+                reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         else
             status = builder.AppendValues(internal_data.data() + start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         checkStatus(status, write_column->getName(), format_name);

From 333c45634fa8fd239d562b958146fc2178e1cd2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 1 Feb 2024 16:53:31 +0100
Subject: [PATCH 403/884] Infinite recursion

---
 base/base/int8_to_string.h | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/base/base/int8_to_string.h b/base/base/int8_to_string.h
index db92630ad86..715473b1482 100644
--- a/base/base/int8_to_string.h
+++ b/base/base/int8_to_string.h
@@ -6,25 +6,8 @@
 #include <fmt/format.h>
 
 template <>
-struct fmt::formatter<Int8>
+struct fmt::formatter<Int8> : fmt::formatter<int8_t>
 {
-    constexpr auto parse(format_parse_context & ctx)
-    {
-        const auto * it = ctx.begin();
-        const auto * end = ctx.end();
-
-        /// Only support {}.
-        if (it != end && *it != '}')
-            throw format_error("invalid format");
-
-        return it;
-    }
-
-    template <typename FormatContext>
-    auto format(const Int8 & value, FormatContext & ctx) -> decltype(ctx.out())
-    {
-        return format<FormatContext>(int8_t{value}, ctx);
-    }
 };
 
 
From b609fc157fc9a0ca8a6cd0511c93599c401164ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 2 Feb 2024 16:28:09 +0100
Subject: [PATCH 404/884] Silence tidy

---
 base/base/int8_to_string.cpp | 2 +-
 base/base/int8_to_string.h   | 2 +-
 base/base/types.h            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/base/base/int8_to_string.cpp b/base/base/int8_to_string.cpp
index 09da940e1a0..f74a6b8077e 100644
--- a/base/base/int8_to_string.cpp
+++ b/base/base/int8_to_string.cpp
@@ -2,7 +2,7 @@
 
 namespace std
 {
-std::string to_string(Int8 v)
+std::string to_string(Int8 v) /// NOLINT (cert-dcl58-cpp)
 {
     return to_string(int8_t{v});
 }
diff --git a/base/base/int8_to_string.h b/base/base/int8_to_string.h
index 715473b1482..af0914f4312 100644
--- a/base/base/int8_to_string.h
+++ b/base/base/int8_to_string.h
@@ -13,5 +13,5 @@ struct fmt::formatter<Int8> : fmt::formatter<int8_t>
 
 namespace std
 {
-std::string to_string(Int8 v);
+std::string to_string(Int8 v); /// NOLINT (cert-dcl58-cpp)
 }
diff --git a/base/base/types.h b/base/base/types.h
index 52e9df65f04..a4874860514 100644
--- a/base/base/types.h
+++ b/base/base/types.h
@@ -16,7 +16,7 @@ using Int8 = signed _BitInt(8);
 namespace std
 {
 template <>
-struct hash<Int8>
+struct hash<Int8> /// NOLINT (cert-dcl58-cpp)
 {
     size_t operator()(const Int8 x) const { return std::hash<int8_t>()(int8_t{x}); }
 };

From ff63a1cf9f578fa4f33433b9969d0eb844383d1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 13:41:07 +0100
Subject: [PATCH 405/884] Fix tidy

---
 src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index f99ebf51792..ba3cc6f58d0 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -313,7 +313,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                 Int32 col_len = readInt32(message, pos, size);
                 String value;
                 for (Int32 i = 0; i < col_len; ++i)
-                    value += readInt8(message, pos, size);
+                    value += static_cast<char>(readInt8(message, pos, size));
 
                 insertValue(storage_data, value, column_idx);
                 break;

From 25738c8313bff80f3a2e3a8e3a53101041a62f2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 5 Feb 2024 14:26:46 +0100
Subject: [PATCH 406/884] Remove no longer needed specialization

---
 src/Functions/if.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 5346f3a811c..7306dc4173e 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -230,13 +230,8 @@ inline void fillConstantConstant(const ArrayCond & cond, A a, B b, ArrayResult &
 {
     size_t size = cond.size();
 
-    /// Int8(alias type of uint8_t) has special aliasing properties that prevents compiler from auto-vectorizing for below codes, refer to https://gist.github.com/alexei-zaripov/dcc14c78819c5f1354afe8b70932007c
-    ///
-    /// for (size_t i = 0; i < size; ++i)
-    ///     res[i] = cond[i] ? static_cast<Int8>(a) : static_cast<Int8>(b);
-    ///
-    /// Therefore, we manually optimize it by avoiding branch miss when ResultType is Int8. Other types like (U)Int128|256 or Decimal128/256 also benefit from this optimization.
-    if constexpr (std::is_same_v<ResultType, Int8> || is_over_big_int<ResultType>)
+    /// We manually optimize the loop for types like (U)Int128|256 or Decimal128/256 to avoid branches
+    if constexpr (is_over_big_int<ResultType>)
     {
         alignas(64) const ResultType ab[2] = {static_cast<ResultType>(a), static_cast<ResultType>(b)};
         for (size_t i = 0; i < size; ++i)

From 942addfa8c55e6f32fb2b9af4c7ede7d19ddba6d Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 6 Feb 2024 21:22:55 +0100
Subject: [PATCH 407/884] Learning how std::vector works

---
 src/Databases/TablesDependencyGraph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index fe358694f9a..5f66f11e6fd 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -703,7 +703,7 @@ std::vector<std::vector<StorageID>> TablesDependencyGraph::getTablesSplitByDepen
 {
     std::vector<std::vector<StorageID>> tables_split_by_level;
     auto sorted_nodes = getNodesSortedByLevel();
-    tables_split_by_level.reserve(sorted_nodes.back()->level);
+    tables_split_by_level.resize(sorted_nodes.back()->level + 1);
     for (const auto * node : sorted_nodes)
     {
         tables_split_by_level[node->level].emplace_back(node->storage_id);

From f7a0c38a3ddc2caf555daae22badf56c38430800 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 6 Feb 2024 21:31:44 +0100
Subject: [PATCH 408/884] Unquote FLAG_LATEST to fix issue with empty argument

---
 .github/workflows/reusable_docker.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/reusable_docker.yml
index 1b186350580..3fe1a8883c6 100644
--- a/.github/workflows/reusable_docker.yml
+++ b/.github/workflows/reusable_docker.yml
@@ -63,4 +63,4 @@ jobs:
           python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
             --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
             --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
-            "$FLAG_LATEST"
+            $FLAG_LATEST

From d64db12f72a40bf80da7f3e3944ed143e1f7a89a Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 6 Feb 2024 21:44:45 +0100
Subject: [PATCH 409/884] Fix for empty `sorted_nodes`

---
 src/Databases/TablesDependencyGraph.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp
index 5f66f11e6fd..4b05f19fe91 100644
--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@@ -703,6 +703,9 @@ std::vector<std::vector<StorageID>> TablesDependencyGraph::getTablesSplitByDepen
 {
     std::vector<std::vector<StorageID>> tables_split_by_level;
     auto sorted_nodes = getNodesSortedByLevel();
+    if (sorted_nodes.empty())
+        return tables_split_by_level;
+
     tables_split_by_level.resize(sorted_nodes.back()->level + 1);
     for (const auto * node : sorted_nodes)
     {

From 8d08d60bf1a012131b075079639a73cb118f79c6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 6 Feb 2024 23:49:54 +0100
Subject: [PATCH 410/884] Rename a setting

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 67bd721d3d1..8bba62e1ff3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -857,7 +857,7 @@ class IColumn;
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
+    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \

From 23c9b32282810e9aeef11cd54dedf95bc11b8a5c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 6 Feb 2024 23:54:12 +0100
Subject: [PATCH 411/884] Fix the documentation

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8bba62e1ff3..5e009cda254 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -857,7 +857,7 @@ class IColumn;
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
+    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \

From 5991ea4dc3ed3ff53c76ca8aa16d1e92b6959603 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 00:00:34 +0100
Subject: [PATCH 412/884] Rename the setting

---
 src/Functions/keyvaluepair/extractKeyValuePairs.cpp         | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.reference  | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.sql        | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
index 34081cddb92..94f02861af0 100644
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@@ -43,11 +43,11 @@ class ExtractKeyValuePairs : public IFunction
             builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
         }
 
-        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_kvp_max_pairs_per_row == 0;
+        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row == 0;
 
         if (!is_number_of_pairs_unlimited)
         {
-            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row);
         }
 
         return builder.build();
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index f646583bbd3..9a0cfdffcb5 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -293,7 +293,7 @@ SELECT
 {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'}
 -- { echoOn }
 
-SET extract_kvp_max_pairs_per_row = 2;
+SET extract_key_value_pairs_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -307,7 +307,7 @@ WITH
 SELECT
     x;
 {'key1':'value1','key2':'value2'}
-SET extract_kvp_max_pairs_per_row = 0;
+SET extract_key_value_pairs_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 9277ba6d7ec..4f3db3f166b 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -415,7 +415,7 @@ SELECT
     x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 
 -- Should fail allowed because it exceeds the max number of pairs
-SET extract_kvp_max_pairs_per_row = 1;
+SET extract_key_value_pairs_max_pairs_per_row = 1;
 WITH
     extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
     CAST(
@@ -429,7 +429,7 @@ SELECT
 
 -- { echoOn }
 
-SET extract_kvp_max_pairs_per_row = 2;
+SET extract_key_value_pairs_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -443,7 +443,7 @@ WITH
 SELECT
     x;
 
-SET extract_kvp_max_pairs_per_row = 0;
+SET extract_key_value_pairs_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH

From 10082399d540a1ab5cee3ff381856193afc9ad22 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 7 Feb 2024 00:52:25 +0000
Subject: [PATCH 413/884] Minor review fixes

---
 src/Client/ConnectionPool.h                 | 22 ++++++++++++---------
 src/Common/ProfileEvents.cpp                |  2 +-
 src/Processors/QueryPlan/ReadFromRemote.cpp |  8 ++++----
 src/QueryPipeline/RemoteQueryExecutor.cpp   |  2 +-
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h
index 1886a0431a5..8e707e8190f 100644
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@@ -27,6 +27,9 @@ class IConnectionPool : private boost::noncopyable
 public:
     using Entry = PoolBase<Connection>::Entry;
 
+    IConnectionPool() = default;
+    IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {}
+
     virtual ~IConnectionPool() = default;
 
     /// Selects the connection to work.
@@ -36,7 +39,15 @@ public:
                       const Settings & settings,
                       bool force_connected = true) = 0;
 
+    const std::string & getHost() const { return host; }
+    UInt16 getPort() const { return port; }
+    const String & getAddress() const { return address; }
     virtual Priority getPriority() const { return Priority{1}; }
+
+protected:
+    const String host;
+    const UInt16 port = 0;
+    const String address;
 };
 
 using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@@ -63,10 +74,9 @@ public:
             Protocol::Compression compression_,
             Protocol::Secure secure_,
             Priority priority_ = Priority{1})
-       : Base(max_connections_,
+       : IConnectionPool(host_, port_),
+        Base(max_connections_,
         getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
-        host(host_),
-        port(port_),
         default_database(default_database_),
         user(user_),
         password(password_),
@@ -99,10 +109,6 @@ public:
         return entry;
     }
 
-    const std::string & getHost() const
-    {
-        return host;
-    }
     std::string getDescription() const
     {
         return host + ":" + toString(port);
@@ -125,8 +131,6 @@ protected:
     }
 
 private:
-    String host;
-    UInt16 port;
     String default_database;
     String user;
     String password;
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 0218545c3a4..fc30a4e0794 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -617,7 +617,7 @@ The server successfully detected this situation and will download merged part fr
     \
     M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \
     M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \
-    M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but unavailable, to execute a query with task-based parallel replicas") \
+    M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas") \
 
 #ifdef APPLY_FOR_EXTERNAL_EVENTS
     #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 6764e095088..5707eb2e9c6 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -375,10 +375,11 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
     , storage_limits(std::move(storage_limits_))
     , log(log_)
 {
-    std::vector<String> description;
+    chassert(cluster->getShardCount() == 1);
 
-    for (const auto & address : cluster->getShardsAddresses())
-        description.push_back(fmt::format("Replica: {}", address[0].host_name));
+    std::vector<String> description;
+    for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools)
+        description.push_back(fmt::format("Replica: {}", pool->getHost()));
 
     setStepDescription(boost::algorithm::join(description, ", "));
 }
@@ -412,7 +413,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         all_replicas_count = shard.getAllNodeCount();
     }
 
-    chassert(cluster->getShardCount() == 1);
     auto shuffled_pool = shard.pool->getShuffledPools(current_settings);
     shuffled_pool.resize(all_replicas_count);
 
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index e44749dfb97..46616905bcb 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -105,7 +105,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
             connection_entries.emplace_back(std::move(result.entry));
         }
 
-        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), context->getSettingsRef(), throttler);
+        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
         if (extension_ && extension_->replica_info)
             res->setReplicaInfo(*extension_->replica_info);
 

From 755298838fba555fa5dba644277e151841d1cf67 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 7 Feb 2024 01:28:27 +0000
Subject: [PATCH 414/884] Preserve replicas order for data locality

---
 src/Processors/QueryPlan/ReadFromRemote.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 5707eb2e9c6..91c81d619a7 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -413,7 +413,17 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         all_replicas_count = shard.getAllNodeCount();
     }
 
-    auto shuffled_pool = shard.pool->getShuffledPools(current_settings);
+
+    std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> shuffled_pool;
+    if (all_replicas_count < shard.getAllNodeCount())
+        shuffled_pool = shard.pool->getShuffledPools(current_settings);
+    else
+    {
+        /// try to preserve replicas order if all replicas in cluster are used for query execution
+        /// it's important for data locality during query execution
+        auto priority_func = [](size_t i) { return Priority{static_cast<Int64>(i)}; };
+        shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func);
+    }
     shuffled_pool.resize(all_replicas_count);
 
     for (size_t i=0; i < all_replicas_count; ++i)

From 4f153b59c06afa2c75e9f43b1279900d594359cd Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 7 Feb 2024 01:30:48 +0000
Subject: [PATCH 415/884] Minor fix

---
 src/Processors/QueryPlan/ReadFromRemote.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 91c81d619a7..93c73a66b78 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -416,7 +416,10 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
 
     std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> shuffled_pool;
     if (all_replicas_count < shard.getAllNodeCount())
+    {
         shuffled_pool = shard.pool->getShuffledPools(current_settings);
+        shuffled_pool.resize(all_replicas_count);
+    }
     else
     {
         /// try to preserve replicas order if all replicas in cluster are used for query execution
@@ -424,7 +427,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         auto priority_func = [](size_t i) { return Priority{static_cast<Int64>(i)}; };
         shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func);
     }
-    shuffled_pool.resize(all_replicas_count);
 
     for (size_t i=0; i < all_replicas_count; ++i)
     {

From 77e4b0c8d909baea75c7701a77f9cb0ee30db3ca Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 7 Feb 2024 02:52:43 +0100
Subject: [PATCH 416/884] Revert "Merge pull request #59122 from
 arthurpassos/bring_back_attach_partition_pr"

This reverts commit ac906371702c2eac856dac304f59ef041e95d48f, reversing
changes made to fe2b5cde058a70dc38690e3e6754aabc006ad91c.
---
 .../statements/alter/partition.md             |   2 +-
 src/Interpreters/MonotonicityCheckVisitor.h   | 102 +---
 src/Interpreters/applyFunction.cpp            |  43 --
 src/Interpreters/applyFunction.h              |  16 -
 src/Parsers/queryToString.cpp                 |   5 -
 src/Parsers/queryToString.h                   |   1 -
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  37 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |   1 -
 src/Storages/MergeTree/KeyCondition.cpp       |  85 ++-
 src/Storages/MergeTree/MergeTreeData.cpp      | 351 ++++++++-----
 src/Storages/MergeTree/MergeTreeData.h        |  18 -
 .../MergeTree/MergeTreeDataPartCloner.cpp     | 319 ------------
 .../MergeTree/MergeTreeDataPartCloner.h       |  43 --
 src/Storages/MergeTree/MergeTreePartition.cpp |  39 --
 src/Storages/MergeTree/MergeTreePartition.h   |  10 +-
 ...ergeTreePartitionCompatibilityVerifier.cpp |  91 ----
 .../MergeTreePartitionCompatibilityVerifier.h |  30 --
 ...TreePartitionGlobalMinMaxIdxCalculator.cpp |  25 -
 ...geTreePartitionGlobalMinMaxIdxCalculator.h |  24 -
 src/Storages/StorageMergeTree.cpp             |  91 +---
 src/Storages/StorageReplicatedMergeTree.cpp   | 136 +----
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  17 -
 .../test.py                                   | 214 --------
 ...artition_different_partition_exp.reference | 467 -----------------
 ...tach_partition_different_partition_exp.sql | 485 ------------------
 26 files changed, 346 insertions(+), 2306 deletions(-)
 delete mode 100644 src/Interpreters/applyFunction.cpp
 delete mode 100644 src/Interpreters/applyFunction.h
 delete mode 100644 src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
 delete mode 100644 src/Storages/MergeTree/MergeTreeDataPartCloner.h
 delete mode 100644 src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp
 delete mode 100644 src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h
 delete mode 100644 src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp
 delete mode 100644 src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h
 delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py
 delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/test.py
 delete mode 100644 tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference
 delete mode 100644 tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql

diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md
index 5659a0565c5..114b8d5ffe3 100644
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@@ -112,7 +112,7 @@ Note that:
 For the query to run successfully, the following conditions must be met:
 
 - Both tables must have the same structure.
-- Both tables must have the same order by key and the same primary key.
+- Both tables must have the same partition key, the same order by key and the same primary key.
 - Both tables must have the same indices and projections.
 - Both tables must have the same storage policy.
 
diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h
index 4e71bd56851..cc386825024 100644
--- a/src/Interpreters/MonotonicityCheckVisitor.h
+++ b/src/Interpreters/MonotonicityCheckVisitor.h
@@ -1,17 +1,13 @@
 #pragma once
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Core/Range.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/FieldToDataType.h>
 #include <Functions/FunctionFactory.h>
 #include <IO/WriteHelpers.h>
-#include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/applyFunction.h>
+#include <Interpreters/IdentifierSemantic.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTOrderByElement.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/IAST.h>
@@ -37,8 +33,6 @@ public:
         ASTIdentifier * identifier = nullptr;
         DataTypePtr arg_data_type = {};
 
-        Range range = Range::createWholeUniverse();
-
         void reject() { monotonicity.is_monotonic = false; }
         bool isRejected() const { return !monotonicity.is_monotonic; }
 
@@ -103,30 +97,13 @@ public:
         if (data.isRejected())
             return;
 
-        /// Monotonicity check only works for functions that contain at most two arguments and one of them must be a constant.
-        if (!ast_function.arguments)
+        /// TODO: monotonicity for functions of several arguments
+        if (!ast_function.arguments || ast_function.arguments->children.size() != 1)
         {
             data.reject();
             return;
         }
 
-        auto arguments_size = ast_function.arguments->children.size();
-
-        if (arguments_size == 0 || arguments_size > 2)
-        {
-            data.reject();
-            return;
-        }
-        else if (arguments_size == 2)
-        {
-            /// If the function has two arguments, then one of them must be a constant.
-            if (!ast_function.arguments->children[0]->as<ASTLiteral>() && !ast_function.arguments->children[1]->as<ASTLiteral>())
-            {
-                data.reject();
-                return;
-            }
-        }
-
         if (!data.canOptimize(ast_function))
         {
             data.reject();
@@ -147,33 +124,14 @@ public:
             return;
         }
 
-        auto function_arguments = getFunctionArguments(ast_function, data);
-
-        auto function_base = function->build(function_arguments);
+        ColumnsWithTypeAndName args;
+        args.emplace_back(data.arg_data_type, "tmp");
+        auto function_base = function->build(args);
 
         if (function_base && function_base->hasInformationAboutMonotonicity())
         {
             bool is_positive = data.monotonicity.is_positive;
-            data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, data.range.left, data.range.right);
-
-            auto & key_range = data.range;
-
-            /// If we apply function to open interval, we can get empty intervals in result.
-            /// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001').
-            /// To avoid this we make range left and right included.
-            /// Any function that treats NULL specially is not monotonic.
-            /// Thus we can safely use isNull() as an -Inf/+Inf indicator here.
-            if (!key_range.left.isNull())
-            {
-                key_range.left = applyFunction(function_base, data.arg_data_type, key_range.left);
-                key_range.left_included = true;
-            }
-
-            if (!key_range.right.isNull())
-            {
-                key_range.right = applyFunction(function_base, data.arg_data_type, key_range.right);
-                key_range.right_included = true;
-            }
+            data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field());
 
             if (!is_positive)
                 data.monotonicity.is_positive = !data.monotonicity.is_positive;
@@ -185,53 +143,13 @@ public:
 
     static bool needChildVisit(const ASTPtr & parent, const ASTPtr &)
     {
-        /// Multi-argument functions with all but one constant arguments can be monotonic.
+        /// Currently we check monotonicity only for single-argument functions.
+        /// Although, multi-argument functions with all but one constant arguments can also be monotonic.
         if (const auto * func = typeid_cast<const ASTFunction *>(parent.get()))
-            return func->arguments->children.size() <= 2;
+            return func->arguments->children.size() < 2;
 
         return true;
     }
-
-    static ColumnWithTypeAndName extractLiteralColumnAndTypeFromAstLiteral(const ASTLiteral * literal)
-    {
-        ColumnWithTypeAndName result;
-
-        result.type = applyVisitor(FieldToDataType(), literal->value);
-        result.column = result.type->createColumnConst(0, literal->value);
-
-        return result;
-    }
-
-    static ColumnsWithTypeAndName getFunctionArguments(const ASTFunction & ast_function, const Data & data)
-    {
-        ColumnsWithTypeAndName args;
-
-        auto arguments_size = ast_function.arguments->children.size();
-
-        chassert(arguments_size == 1 || arguments_size == 2);
-
-        if (arguments_size == 2)
-        {
-            if (ast_function.arguments->children[0]->as<ASTLiteral>())
-            {
-                const auto * literal = ast_function.arguments->children[0]->as<ASTLiteral>();
-                args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
-                args.emplace_back(data.arg_data_type, "tmp");
-            }
-            else
-            {
-                const auto * literal = ast_function.arguments->children[1]->as<ASTLiteral>();
-                args.emplace_back(data.arg_data_type, "tmp");
-                args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
-            }
-        }
-        else
-        {
-            args.emplace_back(data.arg_data_type, "tmp");
-        }
-
-        return args;
-    }
 };
 
 using MonotonicityCheckVisitor = ConstInDepthNodeVisitor<MonotonicityCheckMatcher, false>;
diff --git a/src/Interpreters/applyFunction.cpp b/src/Interpreters/applyFunction.cpp
deleted file mode 100644
index a53f14f0381..00000000000
--- a/src/Interpreters/applyFunction.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include <Interpreters/applyFunction.h>
-
-#include <Core/Range.h>
-#include <Functions/IFunction.h>
-
-namespace DB
-{
-
-static Field applyFunctionForField(const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value)
-{
-    ColumnsWithTypeAndName columns{
-        {arg_type->createColumnConst(1, arg_value), arg_type, "x"},
-    };
-
-    auto col = func->execute(columns, func->getResultType(), 1);
-    return (*col)[0];
-}
-
-FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
-{
-    /// Fallback for fields without block reference.
-    if (field.isExplicit())
-        return applyFunctionForField(func, current_type, field);
-
-    String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
-    const auto & columns = field.columns;
-    size_t result_idx = columns->size();
-
-    for (size_t i = 0; i < result_idx; ++i)
-        if ((*columns)[i].name == result_name)
-            result_idx = i;
-
-    if (result_idx == columns->size())
-    {
-        ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
-        field.columns->emplace_back(ColumnWithTypeAndName{nullptr, func->getResultType(), result_name});
-        (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
-    }
-
-    return {field.columns, field.row_idx, result_idx};
-}
-
-}
diff --git a/src/Interpreters/applyFunction.h b/src/Interpreters/applyFunction.h
deleted file mode 100644
index 9b8ae43a53c..00000000000
--- a/src/Interpreters/applyFunction.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include <memory>
-
-namespace DB
-{
-struct FieldRef;
-
-class IFunctionBase;
-class IDataType;
-
-using DataTypePtr = std::shared_ptr<const IDataType>;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field);
-}
diff --git a/src/Parsers/queryToString.cpp b/src/Parsers/queryToString.cpp
index 4a1903393f6..9721aa1f128 100644
--- a/src/Parsers/queryToString.cpp
+++ b/src/Parsers/queryToString.cpp
@@ -3,11 +3,6 @@
 
 namespace DB
 {
-    String queryToStringNullable(const ASTPtr & query)
-    {
-        return query ? queryToString(query) : "";
-    }
-
     String queryToString(const ASTPtr & query)
     {
         return queryToString(*query);
diff --git a/src/Parsers/queryToString.h b/src/Parsers/queryToString.h
index 3acd560b1e2..873de218293 100644
--- a/src/Parsers/queryToString.h
+++ b/src/Parsers/queryToString.h
@@ -6,5 +6,4 @@ namespace DB
 {
     String queryToString(const ASTPtr & query);
     String queryToString(const IAST & query);
-    String queryToStringNullable(const ASTPtr & query);
 }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 7e4a51cfbae..0f82e00edff 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -81,7 +81,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
     auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
     size_t minmax_idx_size = minmax_column_types.size();
 
-    hyperrectangle.clear();
     hyperrectangle.reserve(minmax_idx_size);
     for (size_t i = 0; i < minmax_idx_size; ++i)
     {
@@ -105,39 +104,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
     initialized = true;
 }
 
-Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const
-{
-    if (!initialized)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index.");
-
-    Block block;
-
-    const auto metadata_snapshot = data.getInMemoryMetadataPtr();
-    const auto & partition_key = metadata_snapshot->getPartitionKey();
-
-    const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
-    const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
-    const auto minmax_idx_size = minmax_column_types.size();
-
-    for (size_t i = 0; i < minmax_idx_size; ++i)
-    {
-        const auto & data_type = minmax_column_types[i];
-        const auto & column_name = minmax_column_names[i];
-
-        const auto column = data_type->createColumn();
-
-        const auto min_val = hyperrectangle.at(i).left;
-        const auto max_val = hyperrectangle.at(i).right;
-
-        column->insert(min_val);
-        column->insert(max_val);
-
-        block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name));
-    }
-
-    return block;
-}
-
 IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store(
     const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const
 {
@@ -219,7 +185,8 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
 
     if (!initialized)
     {
-        *this = other;
+        hyperrectangle = other.hyperrectangle;
+        initialized = true;
     }
     else
     {
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index b118aa78ce9..fcf9d5bd17d 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -342,7 +342,6 @@ public:
         }
 
         void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager);
-        Block getBlock(const MergeTreeData & data) const;
 
         using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
 
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index e5bcb11091f..d5922ae1bc2 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1,37 +1,36 @@
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnSet.h>
+#include <Storages/MergeTree/KeyCondition.h>
+#include <Storages/MergeTree/BoolMask.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/FieldToDataType.h>
-#include <DataTypes/Utils.h>
 #include <DataTypes/getLeastSupertype.h>
-#include <Functions/CastOverloadResolver.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/IFunction.h>
-#include <Functions/indexHint.h>
-#include <IO/Operators.h>
-#include <IO/WriteBufferFromString.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/Set.h>
+#include <DataTypes/Utils.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/applyFunction.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/ExpressionActions.h>
 #include <Interpreters/castColumn.h>
-#include <Interpreters/convertFieldToType.h>
 #include <Interpreters/misc.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/queryToString.h>
-#include <Storages/MergeTree/BoolMask.h>
-#include <Storages/MergeTree/KeyCondition.h>
-#include <Storages/MergeTree/MergeTreeIndexUtils.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/indexHint.h>
+#include <Functions/CastOverloadResolver.h>
+#include <Functions/IFunction.h>
 #include <Common/FieldVisitorToString.h>
 #include <Common/MortonUtils.h>
 #include <Common/typeid_cast.h>
+#include <Columns/ColumnSet.h>
+#include <Columns/ColumnConst.h>
+#include <Interpreters/convertFieldToType.h>
+#include <Interpreters/Set.h>
+#include <Parsers/queryToString.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <Storages/MergeTree/MergeTreeIndexUtils.h>
 
 #include <algorithm>
 #include <cassert>
@@ -837,6 +836,21 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants
     return node.tryGetConstant(out_value, out_type);
 }
 
+
+static Field applyFunctionForField(
+    const FunctionBasePtr & func,
+    const DataTypePtr & arg_type,
+    const Field & arg_value)
+{
+    ColumnsWithTypeAndName columns
+    {
+        { arg_type->createColumnConst(1, arg_value), arg_type, "x" },
+    };
+
+    auto col = func->execute(columns, func->getResultType(), 1);
+    return (*col)[0];
+}
+
 /// The case when arguments may have types different than in the primary key.
 static std::pair<Field, DataTypePtr> applyFunctionForFieldOfUnknownType(
     const FunctionBasePtr & func,
@@ -876,6 +890,33 @@ static std::pair<Field, DataTypePtr> applyBinaryFunctionForFieldOfUnknownType(
     return {std::move(result), std::move(return_type)};
 }
 
+
+static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
+{
+    /// Fallback for fields without block reference.
+    if (field.isExplicit())
+        return applyFunctionForField(func, current_type, field);
+
+    String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
+    const auto & columns = field.columns;
+    size_t result_idx = columns->size();
+
+    for (size_t i = 0; i < result_idx; ++i)
+    {
+        if ((*columns)[i].name == result_name)
+            result_idx = i;
+    }
+
+    if (result_idx == columns->size())
+    {
+        ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
+        field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name});
+        (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
+    }
+
+    return {field.columns, field.row_idx, result_idx};
+}
+
 /** When table's key has expression with these functions from a column,
   * and when a column in a query is compared with a constant, such as:
   * CREATE TABLE (x String) ORDER BY toDate(x)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 3ca746a7197..c8262914702 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -8,6 +8,21 @@
 #include <Backups/BackupEntryWrappedWith.h>
 #include <Backups/IBackup.h>
 #include <Backups/RestorerFromBackup.h>
+#include <Common/Config/ConfigHelper.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Increment.h>
+#include <Common/ProfileEventsScope.h>
+#include <Common/SimpleIncrement.h>
+#include <Common/Stopwatch.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/ThreadFuzzer.h>
+#include <Common/escapeForFileName.h>
+#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/noexcept_scope.h>
+#include <Common/quoteString.h>
+#include <Common/scope_guard_safe.h>
+#include <Common/typeid_cast.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -27,20 +42,19 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/convertFieldToType.h>
+#include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/convertFieldToType.h>
-#include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/inplaceBlockConversions.h>
-#include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTHelpers.h>
 #include <Parsers/ASTIndexDeclaration.h>
+#include <Parsers/ASTHelpers.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTNameTypePair.h>
 #include <Parsers/ASTPartition.h>
@@ -48,37 +62,25 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTAlterQuery.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/QueryPlan/QueryIdHolder.h>
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/BlockNumberColumn.h>
 #include <Storages/Freeze.h>
-#include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
-#include <Storages/MergeTree/MergeTreeDataPartCloner.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/Statistics/Estimator.h>
-#include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/MutationCommands.h>
+#include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Common/Config/ConfigHelper.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/Increment.h>
-#include <Common/ProfileEventsScope.h>
-#include <Common/Stopwatch.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/ThreadFuzzer.h>
-#include <Common/escapeForFileName.h>
-#include <Common/noexcept_scope.h>
-#include <Common/quoteString.h>
-#include <Common/scope_guard_safe.h>
-#include <Common/typeid_cast.h>
 
 #include <boost/range/algorithm_ext/erase.hpp>
 #include <boost/algorithm/string/join.hpp>
@@ -190,50 +192,6 @@ namespace ErrorCodes
     extern const int LIMIT_EXCEEDED;
 }
 
-static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast)
-{
-    if (partition_ast.fields_count.has_value())
-        return *partition_ast.fields_count;
-
-    if (partition_value_ast->as<ASTLiteral>())
-        return 1;
-
-    const auto * tuple_ast = partition_value_ast->as<ASTFunction>();
-
-    if (!tuple_ast)
-    {
-        throw Exception(
-            ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
-    }
-
-    if (tuple_ast->name != "tuple")
-    {
-        if (!isFunctionCast(tuple_ast))
-            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
-
-        if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
-            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
-
-        auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
-        if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
-        {
-            const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
-            return arguments_ast ? arguments_ast->children.size() : 0;
-        }
-        else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
-        {
-            return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet<Tuple>().size() : 1;
-        }
-
-        throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
-    }
-    else
-    {
-        const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
-        return arguments_ast ? arguments_ast->children.size() : 0;
-    }
-}
-
 static void checkSuspiciousIndices(const ASTFunction * index_function)
 {
     std::unordered_set<UInt64> unique_index_expression_hashes;
@@ -4902,7 +4860,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D
 }
 
 void MergeTreeData::checkAlterPartitionIsPossible(
-    const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const
+    const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const
 {
     for (const auto & command : commands)
     {
@@ -4930,15 +4888,7 @@ void MergeTreeData::checkAlterPartitionIsPossible(
                         throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently");
                 }
                 else
-                {
-                    // The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions
-                    // are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right
-                    // call to `getPartitionIDFromQuery` using source storage.
-                    // Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries.
-                    // But not for `ATTACH PARTITION` queries.
-                    if (command.type != PartitionCommand::REPLACE_PARTITION)
-                        getPartitionIDFromQuery(command.partition, getContext());
-                }
+                    getPartitionIDFromQuery(command.partition, local_context);
             }
         }
     }
@@ -5675,8 +5625,69 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
         MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version);
         return partition_ast.id->as<ASTLiteral>()->value.safeGet<String>();
     }
+    size_t partition_ast_fields_count = 0;
     ASTPtr partition_value_ast = partition_ast.value->clone();
-    auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast);
+    if (!partition_ast.fields_count.has_value())
+    {
+        if (partition_value_ast->as<ASTLiteral>())
+        {
+            partition_ast_fields_count = 1;
+        }
+        else if (const auto * tuple_ast = partition_value_ast->as<ASTFunction>())
+        {
+            if (tuple_ast->name != "tuple")
+            {
+                if (isFunctionCast(tuple_ast))
+                {
+                    if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
+                    {
+                        throw Exception(
+                            ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
+                    }
+                    auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
+                    if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
+                    {
+                        const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
+                        if (arguments_ast)
+                            partition_ast_fields_count = arguments_ast->children.size();
+                        else
+                            partition_ast_fields_count = 0;
+                    }
+                    else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
+                    {
+                        if (inner_literal_tuple->value.getType() == Field::Types::Tuple)
+                            partition_ast_fields_count = inner_literal_tuple->value.safeGet<Tuple>().size();
+                        else
+                            partition_ast_fields_count = 1;
+                    }
+                    else
+                    {
+                        throw Exception(
+                            ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
+                    }
+                }
+                else
+                    throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
+            }
+            else
+            {
+                const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
+                if (arguments_ast)
+                    partition_ast_fields_count = arguments_ast->children.size();
+                else
+                    partition_ast_fields_count = 0;
+            }
+        }
+        else
+        {
+            throw Exception(
+                ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
+        }
+    }
+    else
+    {
+        partition_ast_fields_count = *partition_ast.fields_count;
+    }
 
     if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
     {
@@ -7012,35 +7023,23 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour
     if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical()))
         throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure");
 
-    if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST()))
+    auto query_to_string = [] (const ASTPtr & ast)
+    {
+        return ast ? queryToString(ast) : "";
+    };
+
+    if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST()))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering");
 
+    if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST()))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key");
+
     if (format_version != src_data->format_version)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version");
 
-    if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST()))
+    if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST()))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key");
 
-    const auto is_a_subset_of = [](const auto & lhs, const auto & rhs)
-    {
-        if (lhs.size() > rhs.size())
-            return false;
-
-        const auto rhs_set = NameSet(rhs.begin(), rhs.end());
-        for (const auto & lhs_element : lhs)
-            if (!rhs_set.contains(lhs_element))
-                return false;
-
-        return true;
-    };
-
-    if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey()))
-    {
-        throw Exception(
-            ErrorCodes::BAD_ARGUMENTS,
-            "Destination table partition expression columns must be a subset of source table partition expression columns");
-    }
-
     const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions)
     {
         if (my_descriptions.size() != src_descriptions.size())
@@ -7081,56 +7080,130 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     const ReadSettings & read_settings,
     const WriteSettings & write_settings)
 {
-    return MergeTreeDataPartCloner::clone(
-        this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings);
-}
+    chassert(!isStaticStorage());
 
-std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
-    const MergeTreeData::DataPartPtr & src_part,
-    const MergeTreePartition & new_partition,
-    const String & partition_id,
-    const IMergeTreeDataPart::MinMaxIndex & min_max_index,
-    const String & tmp_part_prefix,
-    const StorageMetadataPtr & my_metadata_snapshot,
-    const IDataPartStorage::ClonePartParams & clone_params,
-    ContextPtr local_context,
-    Int64 min_block,
-    Int64 max_block
-)
-{
-    MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level);
+    /// Check that the storage policy contains the disk where the src_part is located.
+    bool does_storage_policy_allow_same_disk = false;
+    for (const DiskPtr & disk : getStoragePolicy()->getDisks())
+    {
+        if (disk->getName() == src_part->getDataPartStorage().getDiskName())
+        {
+            does_storage_policy_allow_same_disk = true;
+            break;
+        }
+    }
+    if (!does_storage_policy_allow_same_disk)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS,
+            "Could not clone and load part {} because disk does not belong to storage policy",
+            quoteString(src_part->getDataPartStorage().getFullPath()));
 
-    return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression(
-        this,
-        src_part,
-        my_metadata_snapshot,
-        dst_part_info,
-        tmp_part_prefix,
-        local_context->getReadSettings(),
-        local_context->getWriteSettings(),
-        new_partition,
-        min_max_index,
-        false,
-        clone_params);
-}
+    String dst_part_name = src_part->getNewName(dst_part_info);
+    String tmp_dst_part_name = tmp_part_prefix + dst_part_name;
+    auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name);
 
-std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart(
-    const MergeTreeData::DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    ContextPtr local_context)
-{
-    const auto & src_data = src_part->storage;
+    /// Why it is needed if we only hardlink files?
+    auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
+    auto src_part_storage = src_part->getDataPartStoragePtr();
 
-    auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(src_part.get());
-    IMergeTreeDataPart::MinMaxIndex min_max_index;
+    scope_guard src_flushed_tmp_dir_lock;
+    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
 
-    min_max_index.load(src_data, metadata_manager);
+    /// If source part is in memory, flush it to disk and clone it already in on-disk format
+    /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock
+    /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor
+    if (auto src_part_in_memory = asInMemoryPart(src_part))
+    {
+        auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
 
-    MergeTreePartition new_partition;
+        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
+        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
 
-    new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context);
+        auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
 
-    return {new_partition, min_max_index};
+        src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage)
+            .withPartInfo(src_part->info)
+            .withPartFormatFromDisk()
+            .build();
+
+        src_flushed_tmp_part->is_temp = true;
+        src_part_storage = flushed_part_storage;
+    }
+
+    String with_copy;
+    if (params.copy_instead_of_hardlink)
+        with_copy = " (copying data)";
+
+    auto dst_part_storage = src_part_storage->freeze(
+        relative_data_path,
+        tmp_dst_part_name,
+        read_settings,
+        write_settings,
+        /* save_metadata_callback= */ {},
+        params);
+
+    if (params.metadata_version_to_write.has_value())
+    {
+        chassert(!params.keep_metadata_version);
+        auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings());
+        writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
+        out_metadata->finalize();
+        if (getSettings()->fsync_after_insert)
+            out_metadata->sync();
+    }
+
+    LOG_DEBUG(log, "Clone{} part {} to {}{}",
+              src_flushed_tmp_part ? " flushed" : "",
+              src_part_storage->getFullPath(),
+              std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
+              with_copy);
+
+    auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage)
+        .withPartFormatFromDisk()
+        .build();
+
+    if (!params.copy_instead_of_hardlink && params.hardlinked_files)
+    {
+        params.hardlinked_files->source_part_name = src_part->name;
+        params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
+
+        for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
+        {
+            if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
+                && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
+                && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
+            {
+                params.hardlinked_files->hardlinks_from_source_part.insert(it->name());
+            }
+        }
+
+        auto projections = src_part->getProjectionParts();
+        for (const auto & [name, projection_part] : projections)
+        {
+            const auto & projection_storage = projection_part->getDataPartStorage();
+            for (auto it = projection_storage.iterate(); it->isValid(); it->next())
+            {
+                auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
+                if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
+                    && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
+                    && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
+                {
+                    params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
+                }
+            }
+        }
+    }
+
+    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
+    TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
+    dst_data_part->version.setCreationTID(tid, nullptr);
+    dst_data_part->storeVersionMetadata();
+
+    dst_data_part->is_temp = true;
+
+    dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
+    dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime();
+    return std::make_pair(dst_data_part, std::move(temporary_directory_lock));
 }
 
 String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index dfdc22baa8f..4ad440dae00 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -232,7 +232,6 @@ public:
         }
     };
 
-
     using DataParts = std::set<DataPartPtr, LessDataPart>;
     using MutableDataParts = std::set<MutableDataPartPtr, LessDataPart>;
     using DataPartsVector = std::vector<DataPartPtr>;
@@ -855,23 +854,6 @@ public:
         const ReadSettings & read_settings,
         const WriteSettings & write_settings);
 
-    std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
-        const MergeTreeData::DataPartPtr & src_part,
-        const MergeTreePartition & new_partition,
-        const String & partition_id,
-        const IMergeTreeDataPart::MinMaxIndex & min_max_index,
-        const String & tmp_part_prefix,
-        const StorageMetadataPtr & my_metadata_snapshot,
-        const IDataPartStorage::ClonePartParams & clone_params,
-        ContextPtr local_context,
-        Int64 min_block,
-        Int64 max_block);
-
-    static std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> createPartitionAndMinMaxIndexFromSourcePart(
-        const MergeTreeData::DataPartPtr & src_part,
-        const StorageMetadataPtr & metadata_snapshot,
-        ContextPtr local_context);
-
     virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
 
     /// Returns true if table can create new parts with adaptive granularity
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
deleted file mode 100644
index 04019d2c665..00000000000
--- a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp
+++ /dev/null
@@ -1,319 +0,0 @@
-#include <Interpreters/MergeTreeTransaction.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
-#include <Storages/MergeTree/MergeTreeDataPartCloner.h>
-#include <Common/escapeForFileName.h>
-#include <Common/logger_useful.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-extern const int BAD_ARGUMENTS;
-}
-
-namespace DistinctPartitionExpression
-{
-std::unique_ptr<WriteBufferFromFileBase> updatePartitionFile(
-    const MergeTreeData & merge_tree_data,
-    const MergeTreePartition & partition,
-    const MergeTreeData::MutableDataPartPtr & dst_part,
-    IDataPartStorage & storage)
-{
-    storage.removeFile("partition.dat");
-    // Leverage already implemented MergeTreePartition::store to create & store partition.dat.
-    // Checksum is re-calculated later.
-    return partition.store(merge_tree_data, storage, dst_part->checksums);
-}
-
-IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles(
-    const MergeTreeData & merge_tree_data,
-    const MergeTreeData::MutableDataPartPtr & dst_part,
-    IDataPartStorage & storage,
-    const StorageMetadataPtr & metadata_snapshot)
-{
-    for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key))
-    {
-        auto file = "minmax_" + escapeForFileName(column_name) + ".idx";
-        storage.removeFile(file);
-    }
-
-    return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums);
-}
-
-void finalizeNewFiles(const std::vector<std::unique_ptr<WriteBufferFromFileBase>> & files, bool sync_new_files)
-{
-    for (const auto & file : files)
-    {
-        file->finalize();
-        if (sync_new_files)
-            file->sync();
-    }
-}
-
-void updateNewPartFiles(
-    const MergeTreeData & merge_tree_data,
-    const MergeTreeData::MutableDataPartPtr & dst_part,
-    const MergeTreePartition & new_partition,
-    const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
-    const StorageMetadataPtr & src_metadata_snapshot,
-    bool sync_new_files)
-{
-    auto & storage = dst_part->getDataPartStorage();
-
-    *dst_part->minmax_idx = new_min_max_index;
-
-    auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage);
-
-    auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot);
-
-    IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files;
-
-    if (partition_file)
-        written_files.emplace_back(std::move(partition_file));
-
-    written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end()));
-
-    finalizeNewFiles(written_files, sync_new_files);
-
-    // MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create
-    // the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction
-    // with small improvements. It can be further improved in the future.
-    storage.removeFile("checksums.txt");
-}
-}
-
-namespace
-{
-bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part)
-{
-    for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks())
-        if (disk->getName() == src_part->getDataPartStorage().getDiskName())
-            return true;
-    return false;
-}
-
-DataPartStoragePtr flushPartStorageToDiskIfInMemory(
-    MergeTreeData * merge_tree_data,
-    const MergeTreeData::DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    const String & tmp_part_prefix,
-    const String & tmp_dst_part_name,
-    scope_guard & src_flushed_tmp_dir_lock,
-    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part)
-{
-    if (auto src_part_in_memory = asInMemoryPart(src_part))
-    {
-        auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
-        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
-
-        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
-
-        auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot);
-
-        src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage)
-                                   .withPartInfo(src_part->info)
-                                   .withPartFormatFromDisk()
-                                   .build();
-
-        src_flushed_tmp_part->is_temp = true;
-
-        return flushed_part_storage;
-    }
-
-    return src_part->getDataPartStoragePtr();
-}
-
-std::shared_ptr<IDataPartStorage> hardlinkAllFiles(
-    MergeTreeData * merge_tree_data,
-    const DB::ReadSettings & read_settings,
-    const DB::WriteSettings & write_settings,
-    const DataPartStoragePtr & storage,
-    const String & path,
-    const DB::IDataPartStorage::ClonePartParams & params)
-{
-    return storage->freeze(
-        merge_tree_data->getRelativeDataPath(),
-        path,
-        read_settings,
-        write_settings,
-        /*save_metadata_callback=*/{},
-        params);
-}
-
-std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneSourcePart(
-    MergeTreeData * merge_tree_data,
-    const MergeTreeData::DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    const MergeTreePartInfo & dst_part_info,
-    const String & tmp_part_prefix,
-    const ReadSettings & read_settings,
-    const WriteSettings & write_settings,
-    const DB::IDataPartStorage::ClonePartParams & params)
-{
-    const auto dst_part_name = src_part->getNewName(dst_part_info);
-
-    const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name;
-
-    auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name);
-
-    src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
-
-    scope_guard src_flushed_tmp_dir_lock;
-    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
-
-    auto src_part_storage = flushPartStorageToDiskIfInMemory(
-        merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part);
-
-    auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params);
-
-    if (params.metadata_version_to_write.has_value())
-    {
-        chassert(!params.keep_metadata_version);
-        auto out_metadata = dst_part_storage->writeFile(
-            IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings());
-        writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
-        out_metadata->finalize();
-        if (merge_tree_data->getSettings()->fsync_after_insert)
-            out_metadata->sync();
-    }
-
-    LOG_DEBUG(
-        &Poco::Logger::get("MergeTreeDataPartCloner"),
-        "Clone {} part {} to {}{}",
-        src_flushed_tmp_part ? "flushed" : "",
-        src_part_storage->getFullPath(),
-        std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
-        false);
-
-
-    auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build();
-
-    return std::make_pair(part, std::move(temporary_directory_lock));
-}
-
-void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
-{
-    const auto & hardlinked_files = params.hardlinked_files;
-
-    hardlinked_files->source_part_name = src_part->name;
-    hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
-
-    for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
-    {
-        if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
-            && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
-            && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
-        {
-            hardlinked_files->hardlinks_from_source_part.insert(it->name());
-        }
-    }
-}
-
-void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
-{
-    auto projections = src_part->getProjectionParts();
-    for (const auto & [name, projection_part] : projections)
-    {
-        const auto & projection_storage = projection_part->getDataPartStorage();
-        for (auto it = projection_storage.iterate(); it->isValid(); it->next())
-        {
-            auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
-            if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
-                && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
-                && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
-            {
-                params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
-            }
-        }
-    }
-}
-
-MergeTreeData::MutableDataPartPtr finalizePart(
-    const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata)
-{
-    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
-    TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
-    dst_part->version.setCreationTID(tid, nullptr);
-    dst_part->storeVersionMetadata();
-
-    dst_part->is_temp = true;
-
-    dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
-
-    dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime();
-
-    return dst_part;
-}
-
-std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> cloneAndHandleHardlinksAndProjections(
-    MergeTreeData * merge_tree_data,
-    const DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    const MergeTreePartInfo & dst_part_info,
-    const String & tmp_part_prefix,
-    const ReadSettings & read_settings,
-    const WriteSettings & write_settings,
-    const IDataPartStorage::ClonePartParams & params)
-{
-    chassert(!merge_tree_data->isStaticStorage());
-    if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part))
-        throw Exception(
-            ErrorCodes::BAD_ARGUMENTS,
-            "Could not clone and load part {} because disk does not belong to storage policy",
-            quoteString(src_part->getDataPartStorage().getFullPath()));
-
-    auto [destination_part, temporary_directory_lock] = cloneSourcePart(
-        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
-
-    if (!params.copy_instead_of_hardlink && params.hardlinked_files)
-    {
-        handleHardLinkedParameterFiles(src_part, params);
-        handleProjections(src_part, params);
-    }
-
-    return std::make_pair(destination_part, std::move(temporary_directory_lock));
-}
-}
-
-std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::clone(
-    MergeTreeData * merge_tree_data,
-    const DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    const MergeTreePartInfo & dst_part_info,
-    const String & tmp_part_prefix,
-    bool require_part_metadata,
-    const IDataPartStorage::ClonePartParams & params,
-    const ReadSettings & read_settings,
-    const WriteSettings & write_settings)
-{
-    auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
-        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
-
-    return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock));
-}
-
-std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression(
-    MergeTreeData * merge_tree_data,
-    const DataPartPtr & src_part,
-    const StorageMetadataPtr & metadata_snapshot,
-    const MergeTreePartInfo & dst_part_info,
-    const String & tmp_part_prefix,
-    const ReadSettings & read_settings,
-    const WriteSettings & write_settings,
-    const MergeTreePartition & new_partition,
-    const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
-    bool sync_new_files,
-    const IDataPartStorage::ClonePartParams & params)
-{
-    auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
-        merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
-
-    DistinctPartitionExpression::updateNewPartFiles(
-        *merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files);
-
-    return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock));
-}
-
-}
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.h b/src/Storages/MergeTree/MergeTreeDataPartCloner.h
deleted file mode 100644
index 53585f20b7f..00000000000
--- a/src/Storages/MergeTree/MergeTreeDataPartCloner.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-
-namespace DB
-{
-
-struct StorageInMemoryMetadata;
-using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
-struct MergeTreePartition;
-class IMergeTreeDataPart;
-
-class MergeTreeDataPartCloner
-{
-public:
-    using DataPart = IMergeTreeDataPart;
-    using MutableDataPartPtr = std::shared_ptr<DataPart>;
-    using DataPartPtr = std::shared_ptr<const DataPart>;
-
-    static std::pair<MutableDataPartPtr, scope_guard> clone(
-        MergeTreeData * merge_tree_data,
-        const DataPartPtr & src_part,
-        const StorageMetadataPtr & metadata_snapshot,
-        const MergeTreePartInfo & dst_part_info,
-        const String & tmp_part_prefix,
-        bool require_part_metadata,
-        const IDataPartStorage::ClonePartParams & params,
-        const ReadSettings & read_settings,
-        const WriteSettings & write_settings);
-
-    static std::pair<MutableDataPartPtr, scope_guard> cloneWithDistinctPartitionExpression(
-        MergeTreeData * merge_tree_data,
-        const DataPartPtr & src_part,
-        const StorageMetadataPtr & metadata_snapshot,
-        const MergeTreePartInfo & dst_part_info,
-        const String & tmp_part_prefix,
-        const ReadSettings & read_settings,
-        const WriteSettings & write_settings,
-        const MergeTreePartition & new_partition,
-        const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
-        bool sync_new_files,
-        const IDataPartStorage::ClonePartParams & params);
-};
-
-}
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 76ef3be25b3..ddeaf69136a 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -467,45 +467,6 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl
     }
 }
 
-void MergeTreePartition::createAndValidateMinMaxPartitionIds(
-    const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context)
-{
-    if (!metadata_snapshot->hasPartitionKey())
-        return;
-
-    auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context);
-    value.resize(partition_key_names_and_types.size());
-
-    /// Executing partition_by expression adds new columns to passed block according to partition functions.
-    /// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back
-    /// with just `modulo`, because it was a temporary substitution.
-    static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy";
-
-    size_t i = 0;
-    for (const auto & element : partition_key_names_and_types)
-    {
-        auto & partition_column = block_with_min_max_partition_ids.getByName(element.name);
-
-        if (element.name.starts_with(modulo_legacy_function_name))
-            partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo");
-
-        Field extracted_min_partition_id_field;
-        Field extracted_max_partition_id_field;
-
-        partition_column.column->get(0, extracted_min_partition_id_field);
-        partition_column.column->get(1, extracted_max_partition_id_field);
-
-        if (extracted_min_partition_id_field != extracted_max_partition_id_field)
-        {
-            throw Exception(
-                ErrorCodes::INVALID_PARTITION_VALUE,
-                "Can not create the partition. A partition can not contain values that have different partition ids");
-        }
-
-        partition_column.column->get(0u, value[i++]);
-    }
-}
-
 NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context)
 {
     auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context);
diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h
index fd7ae02cde4..78b141f26ec 100644
--- a/src/Storages/MergeTree/MergeTreePartition.h
+++ b/src/Storages/MergeTree/MergeTreePartition.h
@@ -1,12 +1,11 @@
 #pragma once
 
-#include <Core/Field.h>
+#include <base/types.h>
 #include <Disks/IDisk.h>
 #include <IO/WriteBuffer.h>
 #include <Storages/KeyDescription.h>
 #include <Storages/MergeTree/IPartMetadataManager.h>
-#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
-#include <base/types.h>
+#include <Core/Field.h>
 
 namespace DB
 {
@@ -52,11 +51,6 @@ public:
 
     void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context);
 
-    /// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different,
-    /// it means the partition can't be created because the data doesn't belong to the same partition.
-    void createAndValidateMinMaxPartitionIds(
-        const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context);
-
     static void appendFiles(const MergeTreeData & storage, Strings & files);
 
     /// Adjust partition key and execute its expression on block. Return sample block according to used expression.
diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp
deleted file mode 100644
index 21bcdb84a96..00000000000
--- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <Interpreters/MonotonicityCheckVisitor.h>
-#include <Interpreters/getTableExpressions.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
-#include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-extern const int BAD_ARGUMENTS;
-}
-
-namespace
-{
-bool isDestinationPartitionExpressionMonotonicallyIncreasing(
-    const std::vector<Range> & hyperrectangle, const MergeTreeData & destination_storage)
-{
-    auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr();
-
-    auto key_description = destination_table_metadata->getPartitionKey();
-    auto definition_ast = key_description.definition_ast->clone();
-
-    auto table_identifier = std::make_shared<ASTIdentifier>(destination_storage.getStorageID().getTableName());
-    auto table_with_columns
-        = TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()};
-
-    auto expression_list = extractKeyExpressionList(definition_ast);
-
-    MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}};
-
-    for (auto i = 0u; i < expression_list->children.size(); i++)
-    {
-        data.range = hyperrectangle[i];
-
-        MonotonicityCheckVisitor(data).visit(expression_list->children[i]);
-
-        if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive)
-            return false;
-    }
-
-    return true;
-}
-
-bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination)
-{
-    auto source_expression_list = extractKeyExpressionList(source);
-    auto destination_expression_list = extractKeyExpressionList(destination);
-
-    std::unordered_set<std::string> source_columns;
-
-    for (auto i = 0u; i < source_expression_list->children.size(); ++i)
-        source_columns.insert(source_expression_list->children[i]->getColumnName());
-
-    for (auto i = 0u; i < destination_expression_list->children.size(); ++i)
-        if (!source_columns.contains(destination_expression_list->children[i]->getColumnName()))
-            return false;
-
-    return true;
-}
-}
-
-void MergeTreePartitionCompatibilityVerifier::verify(
-    const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts)
-{
-    const auto source_metadata = source_storage.getInMemoryMetadataPtr();
-    const auto destination_metadata = destination_storage.getInMemoryMetadataPtr();
-
-    const auto source_partition_key_ast = source_metadata->getPartitionKeyAST();
-    const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST();
-
-    // If destination partition expression columns are a subset of source partition expression columns,
-    // there is no need to check for monotonicity.
-    if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast))
-        return;
-
-    const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage);
-
-    assert(!src_global_min_max_indexes.hyperrectangle.empty());
-
-    if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage))
-        throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing");
-
-    MergeTreePartition().createAndValidateMinMaxPartitionIds(
-        destination_storage.getInMemoryMetadataPtr(),
-        src_global_min_max_indexes.getBlock(destination_storage),
-        destination_storage.getContext());
-}
-
-}
diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h
deleted file mode 100644
index 1682add3ebd..00000000000
--- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <Core/Field.h>
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-
-namespace DB
-{
-
-/*
- * Verifies that source and destination partitions are compatible.
- * To be compatible, one of the following criteria must be met:
- * 1. Destination partition expression columns are a subset of source partition columns; or
- * 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for
- * the source global min_max idx range is the same.
- *
- * If not, an exception is thrown.
- * */
-
-class MergeTreePartitionCompatibilityVerifier
-{
-public:
-    using DataPart = IMergeTreeDataPart;
-    using DataPartPtr = std::shared_ptr<const DataPart>;
-    using DataPartsVector = std::vector<DataPartPtr>;
-
-    static void
-    verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts);
-};
-
-}
diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp
deleted file mode 100644
index 0871efadf0c..00000000000
--- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
-
-namespace DB
-{
-
-IMergeTreeDataPart::MinMaxIndex
-MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage)
-{
-    IMergeTreeDataPart::MinMaxIndex global_min_max_indexes;
-
-    for (const auto & part : parts)
-    {
-        auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(part.get());
-
-        auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex();
-
-        local_min_max_index.load(storage, metadata_manager);
-
-        global_min_max_indexes.merge(local_min_max_index);
-    }
-
-    return global_min_max_indexes;
-}
-
-}
diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h
deleted file mode 100644
index 4f271177246..00000000000
--- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include <utility>
-
-#include <Core/Field.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-
-namespace DB
-{
-
-/*
- * Calculates global min max indexes for a given set of parts on given storage.
- * */
-class MergeTreePartitionGlobalMinMaxIdxCalculator
-{
-    using DataPart = IMergeTreeDataPart;
-    using DataPartPtr = std::shared_ptr<const DataPart>;
-    using DataPartsVector = std::vector<DataPartPtr>;
-
-public:
-    static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage);
-};
-
-}
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 0f75c726bce..79d03187efb 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -5,9 +5,9 @@
 #include <optional>
 #include <ranges>
 
+#include <base/sort.h>
 #include <Backups/BackupEntriesCollector.h>
 #include <Databases/IDatabase.h>
-#include <IO/copyData.h>
 #include "Common/Exception.h"
 #include <Common/MemoryTracker.h>
 #include <Common/escapeForFileName.h>
@@ -20,30 +20,27 @@
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
-#include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <IO/copyData.h>
 #include <Parsers/ASTCheckQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTPartition.h>
 #include <Parsers/ASTSetQuery.h>
-#include <Parsers/formatAST.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/formatAST.h>
 #include <Planner/Utils.h>
 #include <Storages/buildQueryTreeForShard.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/ActiveDataPartSet.h>
 #include <Storages/AlterCommands.h>
-#include <Storages/MergeTree/MergeList.h>
-#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
-#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
-#include <Storages/MergeTree/MergeTreeSink.h>
-#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
-#include <Storages/MergeTree/PartitionPruner.h>
-#include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/PartitionCommands.h>
-#include <base/sort.h>
+#include <Storages/MergeTree/MergeTreeSink.h>
+#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
+#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
+#include <Storages/MergeTree/PartitionPruner.h>
+#include <Storages/MergeTree/MergeList.h>
+#include <Storages/MergeTree/checkDataPart.h>
 #include <QueryPipeline/Pipe.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@@ -2064,74 +2061,42 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
     ProfileEventsScope profile_events_scope;
 
     MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot);
-    String partition_id = src_data.getPartitionIDFromQuery(partition, local_context);
+    String partition_id = getPartitionIDFromQuery(partition, local_context);
 
     DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id);
-
-    bool attach_empty_partition = !replace && src_parts.empty();
-    if (attach_empty_partition)
-        return;
-
     MutableDataPartsVector dst_parts;
     std::vector<scope_guard> dst_parts_locks;
 
     static const String TMP_PREFIX = "tmp_replace_from_";
 
-    const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST();
-    const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
-    const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
-
-    if (is_partition_exp_different && !src_parts.empty())
-        MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts);
-
-    for (DataPartPtr & src_part : src_parts)
+    for (const DataPartPtr & src_part : src_parts)
     {
         if (!canReplacePartition(src_part))
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                             "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table",
                             partition_id, src_part->name);
 
-        IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
         /// This will generate unique name in scope of current server process.
-        auto index = insert_increment.get();
+        Int64 temp_index = insert_increment.get();
+        MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
 
-        if (is_partition_exp_different)
-        {
-            auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(
-                src_part, my_metadata_snapshot, local_context);
-
-            auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
-                src_part,
-                new_partition,
-                new_partition.getID(*this),
-                new_min_max_index,
-                TMP_PREFIX,
-                my_metadata_snapshot,
-                clone_params,
-                local_context,
-                index,
-                index);
-
-            dst_parts.emplace_back(std::move(dst_part));
-            dst_parts_locks.emplace_back(std::move(part_lock));
-        }
-        else
-        {
-            MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
-
-            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
-                src_part,
-                TMP_PREFIX,
-                dst_part_info,
-                my_metadata_snapshot,
-                clone_params,
-                local_context->getReadSettings(),
-                local_context->getWriteSettings());
-            dst_parts.emplace_back(std::move(dst_part));
-            dst_parts_locks.emplace_back(std::move(part_lock));
-        }
+        IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
+        auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
+            src_part,
+            TMP_PREFIX,
+            dst_part_info,
+            my_metadata_snapshot,
+            clone_params,
+            local_context->getReadSettings(),
+            local_context->getWriteSettings());
+        dst_parts.emplace_back(std::move(dst_part));
+        dst_parts_locks.emplace_back(std::move(part_lock));
     }
 
+    /// ATTACH empty part set
+    if (!replace && dst_parts.empty())
+        return;
+
     MergeTreePartInfo drop_range;
     if (replace)
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6bd57cc4d6d..80fe2c7a838 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -25,18 +25,20 @@
 
 #include <base/sort.h>
 
+#include <Storages/buildQueryTreeForShard.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/Freeze.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
+#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/LeaderElection.h>
+#include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergeFromLogEntryTask.h>
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
-#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
-#include <Storages/MergeTree/MergedBlockOutputStream.h>
+#include <Storages/MergeTree/MergeTreeReaderCompact.h>
 #include <Storages/MergeTree/MutateFromLogEntryTask.h>
 #include <Storages/MergeTree/PinnedPartUUIDs.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeAddress.h>
@@ -48,11 +50,9 @@
 #include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
 #include <Storages/MergeTree/ZeroCopyLock.h>
-#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <Storages/PartitionCommands.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Storages/buildQueryTreeForShard.h>
 
 #include <Databases/DatabaseOnDisk.h>
 #include <Databases/DatabaseReplicated.h>
@@ -2704,48 +2704,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
                 .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
-
-            const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST();
-            const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST();
-
-            const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
-
-            if (is_partition_exp_different)
-            {
-                auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(
-                    part_desc->src_table_part, metadata_snapshot, getContext());
-
-                auto partition_id = new_partition.getID(*this);
-
-                auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
-                    part_desc->src_table_part,
-                    new_partition,
-                    partition_id,
-                    new_min_max_index,
-                    TMP_PREFIX + "clone_",
-                    metadata_snapshot,
-                    clone_params,
-                    getContext(),
-                    part_desc->new_part_info.min_block,
-                    part_desc->new_part_info.max_block);
-
-                part_desc->res_part = std::move(res_part);
-                part_desc->temporary_part_lock = std::move(temporary_part_lock);
-            }
-            else
-            {
-                auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
-                    part_desc->src_table_part,
-                    TMP_PREFIX + "clone_",
-                    part_desc->new_part_info,
-                    metadata_snapshot,
-                    clone_params,
-                    getContext()->getReadSettings(),
-                    getContext()->getWriteSettings());
-
-                part_desc->res_part = std::move(res_part);
-                part_desc->temporary_part_lock = std::move(temporary_part_lock);
-            }
+            auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
+                part_desc->src_table_part,
+                TMP_PREFIX + "clone_",
+                part_desc->new_part_info,
+                metadata_snapshot,
+                clone_params,
+                getContext()->getReadSettings(),
+                getContext()->getWriteSettings());
+            part_desc->res_part = std::move(res_part);
+            part_desc->temporary_part_lock = std::move(temporary_part_lock);
         }
         else if (!part_desc->replica.empty())
         {
@@ -7883,22 +7851,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
     ProfileEventsScope profile_events_scope;
 
     MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot);
-    String partition_id = src_data.getPartitionIDFromQuery(partition, query_context);
+    String partition_id = getPartitionIDFromQuery(partition, query_context);
 
     /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet.
     DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id);
 
-    bool attach_empty_partition = !replace && src_all_parts.empty();
-    if (attach_empty_partition)
-        return;
-
-    const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST();
-    const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
-    const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
-
-    if (is_partition_exp_different && !src_all_parts.empty())
-        MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts);
-
     LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size());
 
     static const String TMP_PREFIX = "tmp_replace_from_";
@@ -7953,18 +7910,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                                 "Cannot replace partition '{}' because part '{}"
                                 "' has inconsistent granularity with table", partition_id, src_part->name);
 
-            IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx;
-            MergeTreePartition merge_tree_partition = src_part->partition;
-
-            if (is_partition_exp_different)
-            {
-                auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context);
-
-                merge_tree_partition = new_partition;
-                min_max_index = new_min_max_index;
-                partition_id = merge_tree_partition.getID(*this);
-            }
-
             String hash_hex = src_part->checksums.getTotalChecksumHex();
             const bool is_duplicated_part = replaced_parts.contains(hash_hex);
             replaced_parts.insert(hash_hex);
@@ -7983,52 +7928,27 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                 continue;
             }
 
+            UInt64 index = lock->getNumber();
+            MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
+
             bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
                 || dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
-
-            UInt64 index = lock->getNumber();
-
             IDataPartStorage::ClonePartParams clone_params
             {
                 .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
-
-            if (is_partition_exp_different)
-            {
-                auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
-                    src_part,
-                    merge_tree_partition,
-                    partition_id,
-                    min_max_index,
-                    TMP_PREFIX,
-                    metadata_snapshot,
-                    clone_params,
-                    query_context,
-                    index,
-                    index);
-
-                dst_parts.emplace_back(dst_part);
-                dst_parts_locks.emplace_back(std::move(part_lock));
-            }
-            else
-            {
-                MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
-
-                auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
-                    src_part,
-                    TMP_PREFIX,
-                    dst_part_info,
-                    metadata_snapshot,
-                    clone_params,
-                    query_context->getReadSettings(),
-                    query_context->getWriteSettings());
-
-                dst_parts.emplace_back(dst_part);
-                dst_parts_locks.emplace_back(std::move(part_lock));
-            }
-
+            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
+                src_part,
+                TMP_PREFIX,
+                dst_part_info,
+                metadata_snapshot,
+                clone_params,
+                query_context->getReadSettings(),
+                query_context->getWriteSettings());
             src_parts.emplace_back(src_part);
+            dst_parts.emplace_back(dst_part);
+            dst_parts_locks.emplace_back(std::move(part_lock));
             ephemeral_locks.emplace_back(std::move(*lock));
             block_id_paths.emplace_back(block_id_path);
             part_checksums.emplace_back(hash_hex);
diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py b/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml
deleted file mode 100644
index b40730e9f7d..00000000000
--- a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_cluster>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>replica1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>replica2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-    </remote_servers>
-</clickhouse>
diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py
deleted file mode 100644
index 1d8ac4e9e37..00000000000
--- a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
-
-cluster = ClickHouseCluster(__file__)
-
-replica1 = cluster.add_instance(
-    "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
-)
-replica2 = cluster.add_instance(
-    "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
-)
-
-
-@pytest.fixture(scope="module")
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    except Exception as ex:
-        print(ex)
-    finally:
-        cluster.shutdown()
-
-
-def cleanup(nodes):
-    for node in nodes:
-        node.query("DROP TABLE IF EXISTS source SYNC")
-        node.query("DROP TABLE IF EXISTS destination SYNC")
-
-
-def create_table(node, table_name, replicated):
-    replica = node.name
-    engine = (
-        f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')"
-        if replicated
-        else "MergeTree()"
-    )
-    partition_expression = (
-        "toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)"
-    )
-    node.query_with_retry(
-        """
-        CREATE TABLE {table_name}(timestamp DateTime)
-        ENGINE = {engine}
-        ORDER BY tuple() PARTITION BY {partition_expression}
-        SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
-        """.format(
-            table_name=table_name,
-            engine=engine,
-            partition_expression=partition_expression,
-        )
-    )
-
-
-def test_both_replicated(start_cluster):
-    for node in [replica1, replica2]:
-        create_table(node, "source", True)
-        create_table(node, "destination", True)
-
-    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
-    replica1.query("SYSTEM SYNC REPLICA source")
-    replica1.query("SYSTEM SYNC REPLICA destination")
-    replica1.query(
-        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
-    )
-
-    assert_eq_with_retry(
-        replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination",
-        replica2.query(f"SELECT * FROM destination"),
-    )
-
-    cleanup([replica1, replica2])
-
-
-def test_only_destination_replicated(start_cluster):
-    create_table(replica1, "source", False)
-    create_table(replica1, "destination", True)
-    create_table(replica2, "destination", True)
-
-    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
-    replica1.query("SYSTEM SYNC REPLICA destination")
-    replica1.query(
-        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
-    )
-
-    assert_eq_with_retry(
-        replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination",
-        replica2.query(f"SELECT * FROM destination"),
-    )
-
-    cleanup([replica1, replica2])
-
-
-def test_both_replicated_partitioned_to_unpartitioned(start_cluster):
-    def create_tables(nodes):
-        for node in nodes:
-            source_engine = (
-                f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
-            )
-            node.query(
-                """
-                CREATE TABLE source(timestamp DateTime)
-                ENGINE = {engine}
-                ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp)
-                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
-                """.format(
-                    engine=source_engine,
-                )
-            )
-
-            destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
-            node.query(
-                """
-                CREATE TABLE destination(timestamp DateTime)
-                ENGINE = {engine}
-                ORDER BY tuple() PARTITION BY tuple()
-                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
-                """.format(
-                    engine=destination_engine,
-                )
-            )
-
-    create_tables([replica1, replica2])
-
-    replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
-    replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')")
-    replica1.query("SYSTEM SYNC REPLICA source")
-    replica1.query("SYSTEM SYNC REPLICA destination")
-
-    replica1.query(
-        f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
-    )
-    replica1.query(
-        f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source"
-    )
-
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination ORDER BY timestamp",
-        "2010-03-02 02:01:01\n2010-03-03 02:01:01\n",
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination ORDER BY timestamp",
-        replica2.query(f"SELECT * FROM destination ORDER BY timestamp"),
-    )
-
-    cleanup([replica1, replica2])
-
-
-def test_both_replicated_different_exp_same_id(start_cluster):
-    def create_tables(nodes):
-        for node in nodes:
-            source_engine = (
-                f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
-            )
-            node.query(
-                """
-                CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
-                ENGINE = {engine}
-                ORDER BY tuple() PARTITION BY a % 3
-                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
-                """.format(
-                    engine=source_engine,
-                )
-            )
-
-            destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
-            node.query(
-                """
-                CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
-                ENGINE = {engine}
-                ORDER BY tuple() PARTITION BY a
-                SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
-                """.format(
-                    engine=destination_engine,
-                )
-            )
-
-    create_tables([replica1, replica2])
-
-    replica1.query(
-        "INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)"
-    )
-    replica1.query(
-        "INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)"
-    )
-    replica1.query("SYSTEM SYNC REPLICA source")
-    replica1.query("SYSTEM SYNC REPLICA destination")
-
-    replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source")
-    replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source")
-
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination ORDER BY a",
-        "1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n",
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT * FROM destination ORDER BY a",
-        replica2.query(f"SELECT * FROM destination ORDER BY a"),
-    )
-
-    cleanup([replica1, replica2])
diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference
deleted file mode 100644
index f1d036b08bf..00000000000
--- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference
+++ /dev/null
@@ -1,467 +0,0 @@
--- { echoOn }
--- Should be allowed since destination partition expr is monotonically increasing and compatible
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '20100302' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
--- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though
--- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-20100302
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '201003' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-20100302
--- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
-CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1);
-ALTER TABLE destination ATTACH PARTITION ID '0' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	1
-2010-03-02 02:01:03	1
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	1
-2010-03-02 02:01:03	1
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION 0 FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	1
-2010-03-02 02:01:03	1
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	1
-2010-03-02 02:01:03	1
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1
--- Should be allowed because dst partition exp is monot inc and data is not split
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source;
-SELECT * FROM source ORDER BY productName;
-mop	general
-rice	food
-spaghetti	food
-SELECT * FROM destination ORDER BY productName;
-rice	food
-spaghetti	food
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-59532f3c39a412a413f0f014c7750a9d
-59532f3c39a412a413f0f014c7750a9d
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source;
-SELECT * FROM source ORDER BY productName;
-mop	general
-rice	food
-spaghetti	food
-SELECT * FROM destination ORDER BY productName;
-rice	food
-spaghetti	food
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-59532f3c39a412a413f0f014c7750a9d
-59532f3c39a412a413f0f014c7750a9d
--- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133
-
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000);
-CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000)));
-INSERT INTO TABLE source VALUES (1267495261123);
-ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-1267495261123
-SELECT * FROM destination ORDER BY timestamp;
-1267495261123
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-2010
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '14670' from source;
-SELECT * FROM source ORDER BY timestamp;
-1267495261123
-SELECT * FROM destination ORDER BY timestamp;
-1267495261123
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-2010
--- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726
-
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp);
-CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400));
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1);
-ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	1	1
-2010-03-02 02:01:01	1	1
-2011-02-02 02:01:03	1	1
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	1	1
-2010-03-02 02:01:01	1	1
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-14670
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '2010' from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	1	1
-2010-03-02 02:01:01	1	1
-2011-02-02 02:01:03	1	1
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	1	1
-2010-03-02 02:01:01	1	1
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-14670
--- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately
--- fall into the same partition.
--- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple();
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-all
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '201003' from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-all
--- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that
--- partition by tuple() is accepted.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple();
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-all
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '201003' from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-all
--- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
--- Columns in this case refer to the expression elements, not to the actual table columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
-CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b);
-INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
-ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
-SELECT * FROM source ORDER BY (a, b, c);
-1	2	3
-1	2	4
-SELECT * FROM destination ORDER BY (a, b, c);
-1	2	3
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1-2
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
-SELECT * FROM source ORDER BY (a, b, c);
-1	2	3
-1	2	4
-SELECT * FROM destination ORDER BY (a, b, c);
-1	2	3
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1-2
--- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
--- Columns in this case refer to the expression elements, not to the actual table columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
-CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a;
-INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
-ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
-SELECT * FROM source ORDER BY (a, b, c);
-1	2	3
-1	2	4
-SELECT * FROM destination ORDER BY (a, b, c);
-1	2	3
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
-SELECT * FROM source ORDER BY (a, b, c);
-1	2	3
-1	2	4
-SELECT * FROM destination ORDER BY (a, b, c);
-1	2	3
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-1
--- Should be allowed. Special test case, tricky to explain. First column of source partition expression is
--- timestamp, while first column of destination partition expression is `A`. One of the previous implementations
--- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp;
-CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp;
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5);
-ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	5
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	5
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-5
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION (201003, 0) from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01	5
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01	5
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-5
--- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically
--- increasing in the source partition min max indexes.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
-CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
-INSERT INTO TABLE source VALUES (6, 12);
-ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source;
-SELECT * FROM source ORDER BY A;
-6	12
-SELECT * FROM destination ORDER BY A;
-6	12
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-3-6
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION (6, 12) from source;
-SELECT * FROM source ORDER BY A;
-6	12
-SELECT * FROM destination ORDER BY A;
-6	12
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-3-6
--- Should be allowed. The same scenario as above, but partition expressions inverted.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
-CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
-INSERT INTO TABLE source VALUES (6, 12);
-ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source;
-SELECT * FROM source ORDER BY A;
-6	12
-SELECT * FROM destination ORDER BY A;
-6	12
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-6-12
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION (3, 6) from source;
-SELECT * FROM source ORDER BY A;
-6	12
-SELECT * FROM destination ORDER BY A;
-6	12
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-6-12
--- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE
-    source(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMMDD(timestamp)
-        ORDER BY tuple();
-CREATE TABLE
-    destination(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMM(timestamp)
-        ORDER BY tuple();
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '20100302' from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
--- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated
-DROP TABLE IF EXISTS source SYNC;
-DROP TABLE IF EXISTS destination SYNC;
-CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple();
-CREATE TABLE
-    destination(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMM(timestamp)
-        ORDER BY tuple();
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
-TRUNCATE TABLE destination;
-ALTER TABLE destination ATTACH PARTITION '20100302' from source;
-SELECT * FROM source ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT * FROM destination ORDER BY timestamp;
-2010-03-02 02:01:01
-2010-03-02 02:01:03
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-201003
--- Should not be allowed because data would be split into two different partitions
-DROP TABLE IF EXISTS source SYNC;
-DROP TABLE IF EXISTS destination SYNC;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03');
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 }
-ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 }
--- Should not be allowed because data would be split into two different partitions
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
-CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2);
-ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 }
-ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 }
--- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2);
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
--- Should not be allowed because dst partition exp depends on a different set of columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName);
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
--- Should not be allowed because dst partition exp is not monotonically increasing
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2);
-CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName);
-INSERT INTO TABLE source VALUES ('bread'), ('mop');
-INSERT INTO TABLE source VALUES ('broccoli');
-ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 }
--- Empty/ non-existent partition, same partition expression. Nothing should happen
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
-ALTER TABLE destination ATTACH PARTITION 1 FROM source;
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
--- Empty/ non-existent partition, different partition expression. Nothing should happen
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
-ALTER TABLE destination ATTACH PARTITION 1 FROM source;
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
--- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-ALTER TABLE destination REPLACE PARTITION '1' FROM source;
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
--- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id.
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
-CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
-INSERT INTO TABLE destination VALUES (1);
-ALTER TABLE destination REPLACE PARTITION '1' FROM source;
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql
deleted file mode 100644
index 9547d6ae249..00000000000
--- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql
+++ /dev/null
@@ -1,485 +0,0 @@
--- { echoOn }
--- Should be allowed since destination partition expr is monotonically increasing and compatible
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '20100302' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though
--- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '201003' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
-
-CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1);
-
-ALTER TABLE destination ATTACH PARTITION ID '0' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION 0 FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed because dst partition exp is monot inc and data is not split
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-
-ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source;
-
-SELECT * FROM source ORDER BY productName;
-SELECT * FROM destination ORDER BY productName;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source;
-
-SELECT * FROM source ORDER BY productName;
-SELECT * FROM destination ORDER BY productName;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133
-
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000);
-CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000)));
-
-INSERT INTO TABLE source VALUES (1267495261123);
-
-ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '14670' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726
-
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp);
-CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400));
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1);
-
-ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '2010' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately
--- fall into the same partition.
--- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple();
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '201003' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that
--- partition by tuple() is accepted.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple();
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '201003' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
--- Columns in this case refer to the expression elements, not to the actual table columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
-CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b);
-
-INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
-
-ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
-
-SELECT * FROM source ORDER BY (a, b, c);
-SELECT * FROM destination ORDER BY (a, b, c);
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
-
-SELECT * FROM source ORDER BY (a, b, c);
-SELECT * FROM destination ORDER BY (a, b, c);
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns
--- Columns in this case refer to the expression elements, not to the actual table columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c);
-CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a;
-
-INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4);
-
-ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source;
-
-SELECT * FROM source ORDER BY (a, b, c);
-SELECT * FROM destination ORDER BY (a, b, c);
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source;
-
-SELECT * FROM source ORDER BY (a, b, c);
-SELECT * FROM destination ORDER BY (a, b, c);
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed. Special test case, tricky to explain. First column of source partition expression is
--- timestamp, while first column of destination partition expression is `A`. One of the previous implementations
--- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp;
-CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp;
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5);
-
-ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION (201003, 0) from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically
--- increasing in the source partition min max indexes.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
-CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
-
-INSERT INTO TABLE source VALUES (6, 12);
-
-ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source;
-
-SELECT * FROM source ORDER BY A;
-SELECT * FROM destination ORDER BY A;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION (6, 12) from source;
-
-SELECT * FROM source ORDER BY A;
-SELECT * FROM destination ORDER BY A;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed. The same scenario as above, but partition expressions inverted.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple();
-CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple();
-
-INSERT INTO TABLE source VALUES (6, 12);
-
-ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source;
-
-SELECT * FROM source ORDER BY A;
-SELECT * FROM destination ORDER BY A;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION (3, 6) from source;
-
-SELECT * FROM source ORDER BY A;
-SELECT * FROM destination ORDER BY A;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated.
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-CREATE TABLE
-    source(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMMDD(timestamp)
-        ORDER BY tuple();
-
-CREATE TABLE
-    destination(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMM(timestamp)
-        ORDER BY tuple();
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '20100302' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated
-DROP TABLE IF EXISTS source SYNC;
-DROP TABLE IF EXISTS destination SYNC;
-CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple();
-
-CREATE TABLE
-    destination(timestamp DateTime)
-    ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1')
-        PARTITION BY toYYYYMM(timestamp)
-        ORDER BY tuple();
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
-TRUNCATE TABLE destination;
-
-ALTER TABLE destination ATTACH PARTITION '20100302' from source;
-
-SELECT * FROM source ORDER BY timestamp;
-SELECT * FROM destination ORDER BY timestamp;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Should not be allowed because data would be split into two different partitions
-DROP TABLE IF EXISTS source SYNC;
-DROP TABLE IF EXISTS destination SYNC;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03');
-
-ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 }
-ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 }
-
--- Should not be allowed because data would be split into two different partitions
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6);
-
-CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A;
-
-INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2);
-
-ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 }
-ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 }
-
--- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2);
-
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-
-ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
-
--- Should not be allowed because dst partition exp depends on a different set of columns
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category);
-CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName);
-
-INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general');
-INSERT INTO TABLE source VALUES ('rice', 'food');
-
-ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 }
-
--- Should not be allowed because dst partition exp is not monotonically increasing
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2);
-CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName);
-
-INSERT INTO TABLE source VALUES ('bread'), ('mop');
-INSERT INTO TABLE source VALUES ('broccoli');
-
-ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 }
-ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 }
-
--- Empty/ non-existent partition, same partition expression. Nothing should happen
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-
-ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
-ALTER TABLE destination ATTACH PARTITION 1 FROM source;
-
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Empty/ non-existent partition, different partition expression. Nothing should happen
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-
-ALTER TABLE destination ATTACH PARTITION ID '1' FROM source;
-ALTER TABLE destination ATTACH PARTITION 1 FROM source;
-
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp);
-
-ALTER TABLE destination REPLACE PARTITION '1' FROM source;
-
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;
-
--- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id.
--- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045
-DROP TABLE IF EXISTS source;
-DROP TABLE IF EXISTS destination;
-
-CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
-CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A;
-
-INSERT INTO TABLE destination VALUES (1);
-
-ALTER TABLE destination REPLACE PARTITION '1' FROM source;
-
-SELECT * FROM destination;
-SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;

From 90f2c4286d647b9888eda0a70e4f5379207c7f76 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 05:33:32 +0300
Subject: [PATCH 417/884] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 60618402174..b3e5dd709ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,7 +22,6 @@
 * Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
 * Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
 * Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
 * Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
 * Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).

From 88b6b38a748e994695b078fa03bd6dd5b9210ee0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 04:21:57 +0100
Subject: [PATCH 418/884] Check stack size in Parser

---
 src/Parsers/IParser.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index d53b58baa7c..6be33e43920 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -9,6 +9,7 @@
 #include <Parsers/TokenIterator.h>
 #include <base/types.h>
 #include <Common/Exception.h>
+#include <Common/checkStackSize.h>
 
 
 namespace DB
@@ -73,6 +74,16 @@ public:
             if (unlikely(max_depth > 0 && depth > max_depth))
                 throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. "
                     "Consider rising max_parser_depth parameter.", max_depth);
+
+            /** Sometimes the maximum parser depth can be set to a high value by the user,
+              * but we still want to avoid stack overflow.
+              * For this purpose, we can use the checkStackSize function, but it is too heavy.
+              * The solution is to check not too frequently.
+              * The frequency 128 is arbitrary, but not too large, not too small,
+              * and a power of two to simplify the division.
+              */
+            if (depth % 8192 == 0)
+                checkStackSize();
         }
 
         ALWAYS_INLINE void decreaseDepth()

From 5b791ddec894e1587db7a726e68890b479312145 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 04:23:43 +0100
Subject: [PATCH 419/884] Check stack size in Parser

---
 src/Parsers/IParser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index 6be33e43920..99dbc39f06f 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -79,7 +79,7 @@ public:
               * but we still want to avoid stack overflow.
               * For this purpose, we can use the checkStackSize function, but it is too heavy.
               * The solution is to check not too frequently.
-              * The frequency 128 is arbitrary, but not too large, not too small,
+              * The frequency is arbitrary, but not too large, not too small,
               * and a power of two to simplify the division.
               */
             if (depth % 8192 == 0)

From 8733a9634a4342d57c7b7ae8a9ba8bc877ea76fd Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 7 Feb 2024 11:24:27 +0800
Subject: [PATCH 420/884] add uts

---
 .../0_stateless/02985_if_over_big_int_decimal.reference     | 6 ++++++
 tests/queries/0_stateless/02985_if_over_big_int_decimal.sql | 6 ++++++
 2 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
 create mode 100644 tests/queries/0_stateless/02985_if_over_big_int_decimal.sql

diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
new file mode 100644
index 00000000000..055103ad134
--- /dev/null
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
@@ -0,0 +1,6 @@
+49500
+49500
+49500
+49500
+49500
+49500
diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
new file mode 100644
index 00000000000..6868524d195
--- /dev/null
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
@@ -0,0 +1,6 @@
+select sumIf(number::Int128, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt128, number % 10 == 0) from numbers(1000);
+select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);

From a652bd98b25a9cc04a01702815ec151b0f402b45 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 04:30:48 +0100
Subject: [PATCH 421/884] Add a test

---
 .../0_stateless/02985_parser_check_stack_size.reference    | 1 +
 tests/queries/0_stateless/02985_parser_check_stack_size.sh | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/02985_parser_check_stack_size.reference
 create mode 100755 tests/queries/0_stateless/02985_parser_check_stack_size.sh

diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.reference b/tests/queries/0_stateless/02985_parser_check_stack_size.reference
new file mode 100644
index 00000000000..f83e0818db2
--- /dev/null
+++ b/tests/queries/0_stateless/02985_parser_check_stack_size.reference
@@ -0,0 +1 @@
+TOO_DEEP
diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.sh b/tests/queries/0_stateless/02985_parser_check_stack_size.sh
new file mode 100755
index 00000000000..c91a0a3eacc
--- /dev/null
+++ b/tests/queries/0_stateless/02985_parser_check_stack_size.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --query "select 'create table test (x ' || repeat('Array(', 10000) || 'UInt64' || repeat(')', 10000) || ') engine=Memory' format TSVRaw" | $CLICKHOUSE_CURL "${CLICKHOUSE_URL}&max_parser_depth=100000" --data-binary @- | grep -o -F 'TOO_DEEP'

From b5b7847036452f73f0e8d7ebab7af9fcb2497ccb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 7 Feb 2024 10:59:32 +0100
Subject: [PATCH 422/884] Revert "MergeTree FINAL optimization diagnostics and
 settings"

---
 src/Core/Settings.h                           |  2 -
 src/Core/SettingsChangesHistory.h             |  4 +-
 src/Processors/QueryPlan/PartsSplitter.cpp    | 38 ++++---------------
 src/Processors/QueryPlan/PartsSplitter.h      |  3 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |  6 +--
 5 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 221f8a01966..a433d523358 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -215,8 +215,6 @@ class IColumn;
     M(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), "The maximum number of bytes per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
-    M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
-    M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
     M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
     \
     M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 27412ec20a4..a1c1523aaa9 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -116,9 +116,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
-              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"},
-              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
-              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 363fdca22c5..7c66c0cc8df 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -228,7 +228,7 @@ struct SplitPartsRangesResult
     RangesInDataParts intersecting_parts_ranges;
 };
 
-SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, const LoggerPtr & logger)
+SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts)
 {
     /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts.
       *
@@ -483,15 +483,10 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
         intersecting_ranges_in_data_parts.end(),
         [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; });
 
-    LOG_TEST(logger, "Non intersecting ranges in data parts {}", non_intersecting_ranges_in_data_parts.getDescriptions().describe());
-    LOG_TEST(logger, "Intersecting ranges in data parts {}", intersecting_ranges_in_data_parts.getDescriptions().describe());
-
     return {std::move(non_intersecting_ranges_in_data_parts), std::move(intersecting_ranges_in_data_parts)};
 }
 
-std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts,
-    size_t max_layers,
-    const LoggerPtr & logger)
+std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts, size_t max_layers)
 {
     // We will advance the iterator pointing to the mark with the smallest PK value until
     // there will be not less than rows_per_layer rows in the current layer (roughly speaking).
@@ -596,18 +591,8 @@ std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersecting
         result_layers.back() = std::move(current_layer_builder.getCurrentRangesInDataParts());
     }
 
-    size_t result_layers_size = result_layers.size();
-    LOG_TEST(logger, "Split intersecting ranges into {} layers", result_layers_size);
-
-    for (size_t i = 0; i < result_layers_size; ++i)
+    for (auto & layer : result_layers)
     {
-        auto & layer = result_layers[i];
-
-        LOG_TEST(logger, "Layer {} {} filter values in ({}, {}])",
-            i,
-            layer.getDescriptions().describe(),
-            i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf");
-
         std::stable_sort(
             layer.begin(),
             layer.end(),
@@ -727,32 +712,23 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool split_parts_ranges_into_intersecting_and_non_intersecting_final,
-    bool split_intersecting_parts_ranges_into_layers)
+    bool force_process_all_ranges)
 {
     if (max_layers <= 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1");
 
-    auto logger = getLogger("PartsSplitter");
-
     SplitPartsWithRangesByPrimaryKeyResult result;
 
     RangesInDataParts intersecting_parts_ranges = std::move(parts);
 
-    if (split_parts_ranges_into_intersecting_and_non_intersecting_final)
+    if (!force_process_all_ranges)
     {
-        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges, logger);
+        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges);
         result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges);
         intersecting_parts_ranges = std::move(split_result.intersecting_parts_ranges);
     }
 
-    if (!split_intersecting_parts_ranges_into_layers)
-    {
-        result.merging_pipes.emplace_back(in_order_reading_step_getter(intersecting_parts_ranges));
-        return result;
-    }
-
-    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers, logger);
+    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers);
     auto filters = buildFilters(primary_key, borders);
     result.merging_pipes.resize(layers.size());
 
diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h
index 9bceb344589..f1ed1cb0b9c 100644
--- a/src/Processors/QueryPlan/PartsSplitter.h
+++ b/src/Processors/QueryPlan/PartsSplitter.h
@@ -34,6 +34,5 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool split_parts_ranges_into_intersecting_and_non_intersecting,
-    bool split_intersecting_parts_ranges_into_layers);
+    bool force_process_all_ranges);
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 8a04caede80..5ed56f59fc1 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1175,8 +1175,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
 
                 /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column,
                 /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step.
-                bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final &&
-                    data.merging_params.is_deleted_column.empty();
+                bool force_process_all_ranges = !data.merging_params.is_deleted_column.empty();
 
                 SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey(
                     metadata_for_reading->getPrimaryKey(),
@@ -1185,8 +1184,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
                     num_streams,
                     context,
                     std::move(in_order_reading_step_getter),
-                    split_parts_ranges_into_intersecting_and_non_intersecting_final,
-                    settings.split_intersecting_parts_ranges_into_layers_final);
+                    force_process_all_ranges);
 
                 for (auto && non_intersecting_parts_range : split_ranges_result.non_intersecting_parts_ranges)
                     non_intersecting_parts_by_primary_key.push_back(std::move(non_intersecting_parts_range));

From 06d112135ec0156155510653abdef429d7cb6283 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 16:31:10 +0000
Subject: [PATCH 423/884] Simplify prewhere push down from query plan. Try to
 always use it.

---
 src/Interpreters/ActionsDAG.cpp               |  11 +-
 src/Interpreters/ActionsDAG.h                 |   9 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |  56 +-
 .../Optimizations/liftUpFunctions.cpp         |   2 +-
 .../Optimizations/optimizePrewhere.cpp        | 509 ++++++++++--------
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |  27 +-
 .../MergeTree/MergeTreeWhereOptimizer.h       |   9 +-
 7 files changed, 334 insertions(+), 289 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 6512def9202..03d7e620541 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -1631,7 +1631,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second)
     }
 }
 
-ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
+ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping) const
 {
     /// Split DAG into two parts.
     /// (first_nodes, first_outputs) is a part which will have split_list in result.
@@ -1830,7 +1830,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
     second_actions->outputs.swap(second_outputs);
     second_actions->inputs.swap(second_inputs);
 
-    return {std::move(first_actions), std::move(second_actions)};
+    std::unordered_map<const Node *, const Node *> split_nodes_mapping;
+    if (create_split_nodes_mapping)
+    {
+        for (const auto * node : split_nodes)
+            split_nodes_mapping[node] = data[node].to_first;
+    }
+
+    return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)};
 }
 
 ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 45f6e5cc717..04683832c6d 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -326,13 +326,18 @@ public:
     /// Merge current nodes with specified dag nodes
     void mergeNodes(ActionsDAG && second);
 
-    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
+    struct SplitResult
+    {
+        ActionsDAGPtr first;
+        ActionsDAGPtr second;
+        std::unordered_map<const Node *, const Node *> split_nodes_mapping;
+    };
 
     /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
     /// Execution of first then second parts on block is equivalent to execution of initial DAG.
     /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal outputs.
     /// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
-    SplitResult split(std::unordered_set<const Node *> split_nodes) const;
+    SplitResult split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping = false) const;
 
     /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
     SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d0cf9f1160c..187518b9f6c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -600,7 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query.setFinal();
     }
 
-    auto analyze = [&] (bool try_move_to_prewhere)
+    auto analyze = [&] (bool)
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
@@ -632,37 +632,37 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             view = nullptr;
         }
 
-        if (try_move_to_prewhere
-            && storage && storage->canMoveConditionsToPrewhere()
-            && query.where() && !query.prewhere()
-            && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
-        {
-            /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
-            if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
-            {
-                /// Extract column compressed sizes.
-                std::unordered_map<std::string, UInt64> column_compressed_sizes;
-                for (const auto & [name, sizes] : column_sizes)
-                    column_compressed_sizes[name] = sizes.data_compressed;
+        // if (try_move_to_prewhere
+        //     && storage && storage->canMoveConditionsToPrewhere()
+        //     && query.where() && !query.prewhere()
+        //     && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
+        // {
+        //     /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
+        //     if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
+        //     {
+        //         /// Extract column compressed sizes.
+        //         std::unordered_map<std::string, UInt64> column_compressed_sizes;
+        //         for (const auto & [name, sizes] : column_sizes)
+        //             column_compressed_sizes[name] = sizes.data_compressed;
 
-                SelectQueryInfo current_info;
-                current_info.query = query_ptr;
-                current_info.syntax_analyzer_result = syntax_analyzer_result;
+        //         SelectQueryInfo current_info;
+        //         current_info.query = query_ptr;
+        //         current_info.syntax_analyzer_result = syntax_analyzer_result;
 
-                Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
-                const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
+        //         Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
+        //         const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
 
-                MergeTreeWhereOptimizer where_optimizer{
-                    std::move(column_compressed_sizes),
-                    metadata_snapshot,
-                    storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
-                    queried_columns,
-                    supported_prewhere_columns,
-                    log};
+        //         MergeTreeWhereOptimizer where_optimizer{
+        //             std::move(column_compressed_sizes),
+        //             metadata_snapshot,
+        //             storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
+        //             queried_columns,
+        //             supported_prewhere_columns,
+        //             log};
 
-                where_optimizer.optimize(current_info, context);
-            }
-        }
+        //         where_optimizer.optimize(current_info, context);
+        //     }
+        // }
 
         if (query.prewhere() && query.where())
         {
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
index 34a1fc2bb88..3fc2d64b11f 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
@@ -66,7 +66,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
     NameSet sort_columns;
     for (const auto & col : sorting_step->getSortDescription())
         sort_columns.insert(col.column_name);
-    auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
+    auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
 
     // No calculations can be postponed.
     if (unneeded_for_sorting->trivial())
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 7902b36f80e..b2ac34b4b24 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -5,7 +5,8 @@
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Planner/ActionsChain.h>
-#include <deque>
+#include "Functions/FunctionsLogical.h"
+#include "Functions/IFunctionAdaptors.h"
 
 namespace DB
 {
@@ -15,58 +16,58 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-namespace
-{
+// namespace
+// {
 
-void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
-{
-    std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
-    for (const auto * output_node : actions_dag->getOutputs())
-        output_name_to_node.emplace(output_node->result_name, output_node);
+// void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
+// {
+//     std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
+//     for (const auto * output_node : actions_dag->getOutputs())
+//         output_name_to_node.emplace(output_node->result_name, output_node);
 
-    std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
+//     std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
 
-    ActionsDAG::NodeRawConstPtrs updated_outputs;
-    updated_outputs.reserve(expected_header.columns());
+//     ActionsDAG::NodeRawConstPtrs updated_outputs;
+//     updated_outputs.reserve(expected_header.columns());
 
-    for (const auto & column : expected_header)
-    {
-        auto output_node_it = output_name_to_node.find(column.name);
-        if (output_node_it == output_name_to_node.end())
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Invalid move to PREWHERE optimization. Cannot find column {} in output",
-                column.name);
+//     for (const auto & column : expected_header)
+//     {
+//         auto output_node_it = output_name_to_node.find(column.name);
+//         if (output_node_it == output_name_to_node.end())
+//             throw Exception(ErrorCodes::LOGICAL_ERROR,
+//                 "Invalid move to PREWHERE optimization. Cannot find column {} in output",
+//                 column.name);
 
-        updated_outputs.push_back(output_node_it->second);
-        used_output_nodes.insert(output_node_it->second);
-    }
+//         updated_outputs.push_back(output_node_it->second);
+//         used_output_nodes.insert(output_node_it->second);
+//     }
 
-    ActionsDAG::NodeRawConstPtrs unused_outputs;
-    for (const auto * output_node : actions_dag->getOutputs())
-    {
-        if (used_output_nodes.contains(output_node))
-            continue;
+//     ActionsDAG::NodeRawConstPtrs unused_outputs;
+//     for (const auto * output_node : actions_dag->getOutputs())
+//     {
+//         if (used_output_nodes.contains(output_node))
+//             continue;
 
-        unused_outputs.push_back(output_node);
-    }
+//         unused_outputs.push_back(output_node);
+//     }
 
-    auto & actions_dag_outputs = actions_dag->getOutputs();
-    actions_dag_outputs = std::move(updated_outputs);
-    actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
-}
+//     auto & actions_dag_outputs = actions_dag->getOutputs();
+//     actions_dag_outputs = std::move(updated_outputs);
+//     actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
+// }
 
-}
+// }
 
 
 namespace QueryPlanOptimizations
 {
 
-void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
+void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
 {
     if (stack.size() < 3)
         return;
 
-    const auto & frame = stack.back();
+    auto & frame = stack.back();
 
     /** Assume that on stack there are at least 3 nodes:
       *
@@ -82,7 +83,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
     if (storage_prewhere_info && storage_prewhere_info->prewhere_actions)
         return;
 
-    const QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
+    QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
     const auto * filter_step = typeid_cast<FilterStep *>(filter_node->step.get());
     if (!filter_step)
         return;
@@ -92,40 +93,40 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
       * Collect input node to output nodes mapping.
       */
     ColumnsWithTypeAndName required_columns_after_filter;
-    std::unordered_set<std::string> output_nodes_mapped_to_input;
-    std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
+    // std::unordered_set<std::string> output_nodes_mapped_to_input;
+    // std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
 
-    for (const auto * output_node : filter_step->getExpression()->getOutputs())
-    {
-        const auto * node_without_alias = output_node;
-        while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
-            node_without_alias = node_without_alias->children[0];
+    // for (const auto * output_node : filter_step->getExpression()->getOutputs())
+    // {
+    //     const auto * node_without_alias = output_node;
+    //     while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
+    //         node_without_alias = node_without_alias->children[0];
 
-        if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
-        {
-            output_nodes_mapped_to_input.emplace(output_node->result_name);
+    //     if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
+    //     {
+    //         output_nodes_mapped_to_input.emplace(output_node->result_name);
 
-            auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
-            if (output_names_it == input_node_to_output_names.end())
-            {
-                auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
-                output_names_it = insert_it;
-            }
+    //         auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
+    //         if (output_names_it == input_node_to_output_names.end())
+    //         {
+    //             auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
+    //             output_names_it = insert_it;
+    //         }
 
-            output_names_it->second.push_back(output_node->result_name);
-        }
+    //         output_names_it->second.push_back(output_node->result_name);
+    //     }
 
-        if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
-            continue;
+    //     if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
+    //         continue;
 
-        required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
-    }
+    //     required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
+    // }
 
     const auto & context = read_from_merge_tree->getContext();
     const auto & settings = context->getSettingsRef();
 
-    if (!settings.allow_experimental_analyzer)
-        return;
+    // if (!settings.allow_experimental_analyzer)
+    //     return;
 
     const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
     bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal();
@@ -170,7 +171,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
         filter_step->getFilterColumnName(),
         read_from_merge_tree->getContext(),
         is_final);
-    if (!optimize_result.has_value())
+
+    if (!optimize_result.fully_moved_to_prewhere && optimize_result.prewhere_nodes.empty())
         return;
 
     PrewhereInfoPtr prewhere_info;
@@ -181,198 +183,243 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
 
     prewhere_info->need_filter = true;
 
-    auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
+    // QueryPlan::Node * replace_old_filter_node = nullptr;
+    // bool remove_filter_node = false;
 
-    ActionsChain actions_chain;
-
-    std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
-    actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
-
-    auto & filter_actions = optimize_result->filter_actions;
-
-    /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
-      * 1. Filter expressions.
-      * 2. Prewhere filter expressions.
-      *
-      * There can be cases when all expressions are moved to PREWHERE, but it is not
-      * enough to produce required filter output columns.
-      *
-      * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
-      * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
-      * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
-      *
-      * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
-      */
-    bool need_additional_filter_after_prewhere = false;
-
-    if (!filter_actions)
+    if (!optimize_result.fully_moved_to_prewhere)
     {
-        /// Any node from PREWHERE filter actions can be used as possible output node
-        std::unordered_set<std::string> possible_prewhere_output_nodes;
-        for (const auto & node : prewhere_filter_actions->getNodes())
-            possible_prewhere_output_nodes.insert(node.result_name);
+        auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
+        ActionsDAG::NodeRawConstPtrs conditions;
+        conditions.reserve(split_result.split_nodes_mapping.size());
+        for (const auto * condition : optimize_result.prewhere_nodes)
+            conditions.push_back(split_result.split_nodes_mapping.at(condition));
 
-        for (auto & required_column : required_columns_after_filter)
+        prewhere_info->prewhere_actions = std::move(split_result.first);
+        prewhere_info->remove_prewhere_column = true;
+
+        if (conditions.size() == 1)
         {
-            if (!possible_prewhere_output_nodes.contains(required_column.name) &&
-                !output_nodes_mapped_to_input.contains(required_column.name))
+            for (const auto * output : prewhere_info->prewhere_actions->getOutputs())
             {
-                need_additional_filter_after_prewhere = true;
-                break;
-            }
-        }
-    }
-
-    /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
-      * actions output columns as filter actions dag input columns.
-      * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
-      * PREWHERE filter actions.
-      */
-    if (need_additional_filter_after_prewhere || filter_actions)
-    {
-        auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
-        merged_filter_actions->getOutputs().clear();
-        merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
-
-        /// Add old filter step filter column to outputs
-        for (const auto & node : merged_filter_actions->getNodes())
-        {
-            if (node.result_name == filter_step->getFilterColumnName())
-            {
-                merged_filter_actions->getOutputs().push_back(&node);
-                break;
-            }
-        }
-
-        filter_actions = std::move(merged_filter_actions);
-
-        /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
-        prewhere_info->need_filter = false;
-
-        actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
-    }
-
-    auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
-    actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
-
-    actions_chain.finalize();
-
-    prewhere_filter_actions->projectInput(false);
-
-    auto & prewhere_actions_chain_node = actions_chain[0];
-    prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
-    prewhere_info->prewhere_column_name = prewere_filter_node_name;
-    prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);
-
-    read_from_merge_tree->updatePrewhereInfo(prewhere_info);
-
-    QueryPlan::Node * replace_old_filter_node = nullptr;
-    bool remove_filter_node = false;
-
-    if (filter_actions)
-    {
-        filter_actions->projectInput(false);
-
-        /// Match dag output nodes with old filter step header
-        matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
-
-        auto & filter_actions_chain_node = actions_chain[1];
-        bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
-        auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
-            filter_actions,
-            filter_step->getFilterColumnName(),
-            remove_filter_column);
-
-        auto & node = nodes.emplace_back();
-        node.children.emplace_back(frame.node);
-        node.step = std::move(after_prewhere_filter_step);
-
-        replace_old_filter_node = &node;
-    }
-    else
-    {
-        auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
-        bool apply_rename_step = false;
-
-        ActionsDAG::NodeRawConstPtrs updated_outputs;
-
-        /** If in output after read from merge tree there are column names without aliases,
-          * apply old filter step aliases to them.
-          */
-        for (const auto * output_node : rename_actions_dag->getOutputs())
-        {
-            const auto alias_it = input_node_to_output_names.find(output_node->result_name);
-            if (alias_it == input_node_to_output_names.end())
-            {
-                updated_outputs.push_back(output_node);
-                continue;
+                if (output == conditions.front())
+                    prewhere_info->remove_prewhere_column = false;
             }
 
-            for (auto & output_name : alias_it->second)
-            {
-                if (output_name == output_node->result_name)
-                {
-                    updated_outputs.push_back(output_node);
-                    continue;
-                }
-
-                updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
-                apply_rename_step = true;
-            }
-        }
-
-        rename_actions_dag->getOutputs() = std::move(updated_outputs);
-
-        bool apply_match_step = false;
-
-        /// If column order does not match old filter step column order, match dag output nodes with header
-        if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
-        {
-            apply_match_step = true;
-            matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
-        }
-
-        if (apply_rename_step || apply_match_step)
-        {
-            auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
-            if (apply_rename_step)
-                rename_step->setStepDescription("Change column names to column identifiers");
-
-            auto & node = nodes.emplace_back();
-            node.children.emplace_back(frame.node);
-            node.step = std::move(rename_step);
-
-            replace_old_filter_node = &node;
+            prewhere_info->prewhere_column_name = conditions.front()->result_name;
         }
         else
         {
-            replace_old_filter_node = frame.node;
-            remove_filter_node = true;
+
+            FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+            const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
+            prewhere_info->prewhere_column_name = node->result_name;
+            prewhere_info->prewhere_actions->getOutputs().push_back(node);
         }
+
+        read_from_merge_tree->updatePrewhereInfo(prewhere_info);
+        filter_node->step = std::make_unique<FilterStep>(
+            read_from_merge_tree->getOutputStream(),
+            std::move(split_result.second),
+            filter_step->getFilterColumnName(),
+            filter_step->removesFilterColumn());
+
+        return;
     }
 
+    prewhere_info->prewhere_actions = filter_step->getExpression();
+    prewhere_info->prewhere_column_name = filter_step->getFilterColumnName();
+    prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn();
+
+    read_from_merge_tree->updatePrewhereInfo(prewhere_info);
+
+    // replace_old_filter_node = frame.node;
+    // remove_filter_node = true;
+
+
+
+    // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
+
+    // ActionsChain actions_chain;
+
+    // std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
+    // actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
+
+    // auto & filter_actions = optimize_result->filter_actions;
+
+    // /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
+    //   * 1. Filter expressions.
+    //   * 2. Prewhere filter expressions.
+    //   *
+    //   * There can be cases when all expressions are moved to PREWHERE, but it is not
+    //   * enough to produce required filter output columns.
+    //   *
+    //   * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
+    //   * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
+    //   * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
+    //   *
+    //   * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
+    //   */
+    // bool need_additional_filter_after_prewhere = false;
+
+    // if (!filter_actions)
+    // {
+    //     /// Any node from PREWHERE filter actions can be used as possible output node
+    //     std::unordered_set<std::string> possible_prewhere_output_nodes;
+    //     for (const auto & node : prewhere_filter_actions->getNodes())
+    //         possible_prewhere_output_nodes.insert(node.result_name);
+
+    //     for (auto & required_column : required_columns_after_filter)
+    //     {
+    //         if (!possible_prewhere_output_nodes.contains(required_column.name) &&
+    //             !output_nodes_mapped_to_input.contains(required_column.name))
+    //         {
+    //             need_additional_filter_after_prewhere = true;
+    //             break;
+    //         }
+    //     }
+    // }
+
+    // /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
+    //   * actions output columns as filter actions dag input columns.
+    //   * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
+    //   * PREWHERE filter actions.
+    //   */
+    // if (need_additional_filter_after_prewhere || filter_actions)
+    // {
+    //     auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
+    //     merged_filter_actions->getOutputs().clear();
+    //     merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
+
+    //     /// Add old filter step filter column to outputs
+    //     for (const auto & node : merged_filter_actions->getNodes())
+    //     {
+    //         if (node.result_name == filter_step->getFilterColumnName())
+    //         {
+    //             merged_filter_actions->getOutputs().push_back(&node);
+    //             break;
+    //         }
+    //     }
+
+    //     filter_actions = std::move(merged_filter_actions);
+
+    //     /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
+    //     prewhere_info->need_filter = false;
+
+    //     actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
+    // }
+
+    // auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
+    // actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
+
+    // actions_chain.finalize();
+
+    // prewhere_filter_actions->projectInput(false);
+
+    // auto & prewhere_actions_chain_node = actions_chain[0];
+    // prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
+    // prewhere_info->prewhere_column_name = prewere_filter_node_name;
+    // prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);
+
+    // read_from_merge_tree->updatePrewhereInfo(prewhere_info);
+
+    // QueryPlan::Node * replace_old_filter_node = nullptr;
+    // bool remove_filter_node = false;
+
+    // if (filter_actions)
+    // {
+    //     filter_actions->projectInput(false);
+
+    //     /// Match dag output nodes with old filter step header
+    //     matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
+
+    //     auto & filter_actions_chain_node = actions_chain[1];
+    //     bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
+    //     auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
+    //         filter_actions,
+    //         filter_step->getFilterColumnName(),
+    //         remove_filter_column);
+
+    //     auto & node = nodes.emplace_back();
+    //     node.children.emplace_back(frame.node);
+    //     node.step = std::move(after_prewhere_filter_step);
+
+    //     replace_old_filter_node = &node;
+    // }
+    // else
+    // {
+    //     auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
+    //     bool apply_rename_step = false;
+
+    //     ActionsDAG::NodeRawConstPtrs updated_outputs;
+
+    //     /** If in output after read from merge tree there are column names without aliases,
+    //       * apply old filter step aliases to them.
+    //       */
+    //     for (const auto * output_node : rename_actions_dag->getOutputs())
+    //     {
+    //         const auto alias_it = input_node_to_output_names.find(output_node->result_name);
+    //         if (alias_it == input_node_to_output_names.end())
+    //         {
+    //             updated_outputs.push_back(output_node);
+    //             continue;
+    //         }
+
+    //         for (auto & output_name : alias_it->second)
+    //         {
+    //             if (output_name == output_node->result_name)
+    //             {
+    //                 updated_outputs.push_back(output_node);
+    //                 continue;
+    //             }
+
+    //             updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
+    //             apply_rename_step = true;
+    //         }
+    //     }
+
+    //     rename_actions_dag->getOutputs() = std::move(updated_outputs);
+
+    //     bool apply_match_step = false;
+
+    //     /// If column order does not match old filter step column order, match dag output nodes with header
+    //     if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
+    //     {
+    //         apply_match_step = true;
+    //         matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
+    //     }
+
+    //     if (apply_rename_step || apply_match_step)
+    //     {
+    //         auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
+    //         if (apply_rename_step)
+    //             rename_step->setStepDescription("Change column names to column identifiers");
+
+    //         auto & node = nodes.emplace_back();
+    //         node.children.emplace_back(frame.node);
+    //         node.step = std::move(rename_step);
+
+    //         replace_old_filter_node = &node;
+    //     }
+    //     else
+    //     {
+    //         replace_old_filter_node = frame.node;
+    //         remove_filter_node = true;
+    //     }
+    // }
+
     QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
 
     for (auto & filter_parent_child : filter_parent_node->children)
     {
         if (filter_parent_child == filter_node)
         {
-            filter_parent_child = replace_old_filter_node;
+            filter_parent_child = frame.node;
 
             size_t stack_size = stack.size();
 
-            /** If filter step is completely replaced with PREWHERE filter actions, remove it from stack.
-              * Otherwise replace old filter step with new filter step after PREWHERE.
-              */
-            if (remove_filter_node)
-            {
-                std::swap(stack[stack_size - 1], stack[stack_size - 2]);
-                stack.pop_back();
-            }
-            else
-            {
-                stack[stack_size - 2] = Frame{.node = replace_old_filter_node, .next_child = 1};
-            }
+            /// Step is completely replaced with PREWHERE filter actions, remove it from stack.
+            std::swap(stack[stack_size - 1], stack[stack_size - 2]);
+            stack.pop_back();
 
             break;
         }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 4aecf85ac2a..151ce7635b2 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
     LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length));
 }
 
-std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
+MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
     const std::string & filter_column_name,
     const ContextPtr & context,
     bool is_final)
@@ -132,11 +132,14 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
     if (!optimize_result)
         return {};
 
-    auto filter_actions = reconstructDAG(optimize_result->where_conditions);
-    auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions);
+    if (optimize_result->where_conditions.empty())
+        return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
 
-    FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) };
-    return result;
+    std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
+    for (const auto & condition : optimize_result->prewhere_conditions)
+        prewhere_conditions.insert(condition.node.getDAGNode());
+
+    return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = false};
 }
 
 static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column)
@@ -343,20 +346,6 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions)
     return function;
 }
 
-ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions)
-{
-    if (conditions.empty())
-        return {};
-
-    ActionsDAG::NodeRawConstPtrs filter_nodes;
-    filter_nodes.reserve(conditions.size());
-
-    for (const auto & condition : conditions)
-        filter_nodes.push_back(condition.node.getDAGNode());
-
-    return ActionsDAG::buildFilterActionsDAG(filter_nodes);
-}
-
 std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node,
     const WhereOptimizerContext & where_optimizer_context) const
 {
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index b56219e3c59..84afa4cda17 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -47,11 +47,11 @@ public:
 
     struct FilterActionsOptimizeResult
     {
-        ActionsDAGPtr filter_actions;
-        ActionsDAGPtr prewhere_filter_actions;
+        std::unordered_set<const ActionsDAG::Node *> prewhere_nodes;
+        bool fully_moved_to_prewhere = false;
     };
 
-    std::optional<FilterActionsOptimizeResult> optimize(const ActionsDAGPtr & filter_dag,
+    FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag,
         const std::string & filter_column_name,
         const ContextPtr & context,
         bool is_final);
@@ -122,9 +122,6 @@ private:
     /// Reconstruct AST from conditions
     static ASTPtr reconstructAST(const Conditions & conditions);
 
-    /// Reconstruct DAG from conditions
-    static ActionsDAGPtr reconstructDAG(const Conditions & conditions);
-
     void optimizeArbitrary(ASTSelectQuery & select) const;
 
     UInt64 getColumnsSize(const NameSet & columns) const;

From fbd71ee15e22f68d4155437201a83d5c8133c203 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 5 Jan 2024 20:46:13 +0000
Subject: [PATCH 424/884] Fixing style.

---
 src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index b2ac34b4b24..4cea74b9b12 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -235,8 +235,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     // replace_old_filter_node = frame.node;
     // remove_filter_node = true;
 
-
-
     // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
 
     // ActionsChain actions_chain;

From d1902cdba0b9fce3c621e1266e9e004fe2a21daf Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 9 Jan 2024 16:31:16 +0000
Subject: [PATCH 425/884] Fix some tests.

---
 src/Interpreters/ActionsDAG.cpp               |  42 +++--
 .../Optimizations/optimizePrewhere.cpp        | 151 ++++++++++++------
 .../QueryPlan/ReadFromMergeTree.cpp           |  79 +++++++--
 .../MergeTree/MergeTreeSelectProcessor.cpp    |   2 +-
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |   6 +-
 .../02235_add_part_offset_virtual_column.sql  |   4 +-
 6 files changed, 208 insertions(+), 76 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 03d7e620541..5a1f9a87974 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -1765,13 +1765,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
                     }
 
                     /// Input from second DAG should also be in the first.
-                    if (copy.type == ActionType::INPUT)
-                    {
-                        auto & input_copy = first_nodes.emplace_back(*cur.node);
-                        assert(cur_data.to_first == nullptr);
-                        cur_data.to_first = &input_copy;
-                        new_inputs.push_back(cur.node);
-                    }
+                    // if (copy.type == ActionType::INPUT)
+                    // {
+                    //     auto & input_copy = first_nodes.emplace_back(*cur.node);
+                    //     assert(cur_data.to_first == nullptr);
+                    //     cur_data.to_first = &input_copy;
+                    //     new_inputs.push_back(cur.node);
+                    // }
                 }
                 else
                 {
@@ -1790,11 +1790,12 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
                         /// If this node is needed in result, add it as input.
                         Node input_node;
                         input_node.type = ActionType::INPUT;
-                        input_node.result_type = node.result_type;
-                        input_node.result_name = node.result_name;
+                        input_node.result_type = cur.node->result_type;
+                        input_node.result_name = cur.node->result_name;
                         cur_data.to_second = &second_nodes.emplace_back(std::move(input_node));
 
-                        new_inputs.push_back(cur.node);
+                        if (cur.node->type != ActionType::INPUT)
+                            new_inputs.push_back(cur.node);
                     }
                 }
             }
@@ -1810,14 +1811,29 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
     for (const auto * input_node : inputs)
     {
         const auto & cur = data[input_node];
-        first_inputs.push_back(cur.to_first);
+        if (cur.to_first)
+        {
+            first_inputs.push_back(cur.to_first);
+
+            if (cur.to_second)
+                first_outputs.push_back(cur.to_first);
+        }
     }
 
     for (const auto * input : new_inputs)
     {
         const auto & cur = data[input];
-        second_inputs.push_back(cur.to_second);
-        first_outputs.push_back(cur.to_first);
+        if (cur.to_second)
+            second_inputs.push_back(cur.to_second);
+        if (cur.to_first)
+            first_outputs.push_back(cur.to_first);
+    }
+
+    for (const auto * input_node : inputs)
+    {
+        const auto & cur = data[input_node];
+        if (cur.to_second)
+            second_inputs.push_back(cur.to_second);
     }
 
     auto first_actions = std::make_shared<ActionsDAG>();
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 4cea74b9b12..a9405d0cbdb 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -62,6 +62,20 @@ namespace ErrorCodes
 namespace QueryPlanOptimizations
 {
 
+static void removeFromOutput(ActionsDAG & dag, const std::string name)
+{
+    const auto * node = &dag.findInOutputs(name);
+    auto & outputs = dag.getOutputs();
+    for (size_t i = 0; i < outputs.size(); ++i)
+    {
+        if (node == outputs[i])
+        {
+            outputs.erase(outputs.begin() + i);
+            return;
+        }
+    }
+}
+
 void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
 {
     if (stack.size() < 3)
@@ -172,7 +186,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         read_from_merge_tree->getContext(),
         is_final);
 
-    if (!optimize_result.fully_moved_to_prewhere && optimize_result.prewhere_nodes.empty())
+    if (optimize_result.prewhere_nodes.empty())
         return;
 
     PrewhereInfoPtr prewhere_info;
@@ -182,55 +196,102 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         prewhere_info = std::make_shared<PrewhereInfo>();
 
     prewhere_info->need_filter = true;
+    // std::cerr << filter_step->getExpression()->dumpDAG() << std::endl;
 
     // QueryPlan::Node * replace_old_filter_node = nullptr;
     // bool remove_filter_node = false;
 
-    if (!optimize_result.fully_moved_to_prewhere)
-    {
-        auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
-        ActionsDAG::NodeRawConstPtrs conditions;
-        conditions.reserve(split_result.split_nodes_mapping.size());
-        for (const auto * condition : optimize_result.prewhere_nodes)
-            conditions.push_back(split_result.split_nodes_mapping.at(condition));
+    auto filter_expression = filter_step->getExpression();
+    const auto & filter_column_name = filter_step->getFilterColumnName();
 
-        prewhere_info->prewhere_actions = std::move(split_result.first);
+    if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn())
+    {
+        removeFromOutput(*filter_expression, filter_column_name);
+        auto & outputs = filter_expression->getOutputs();
+        size_t size = outputs.size();
+        outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end());
+        filter_expression->removeUnusedActions(false);
+        outputs.resize(size);
+    }
+
+    // std::cerr << "!!!!!!!!!!!!!!!!\n";
+
+    // if (!optimize_result.fully_moved_to_prewhere)
+    // {
+    auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
+
+    // std::cerr << split_result.first->dumpDAG() << std::endl;
+    // std::cerr << split_result.second->dumpDAG() << std::endl;
+
+    // for (const auto * input : split_result.first->getInputs())
+    //     std::cerr << "in 1" << input->result_name << std::endl;
+    // for (const auto * input : split_result.second->getInputs())
+    //     std::cerr << "in 2" << input->result_name << std::endl;
+
+    ActionsDAG::NodeRawConstPtrs conditions;
+    conditions.reserve(split_result.split_nodes_mapping.size());
+    for (const auto * condition : optimize_result.prewhere_nodes)
+    {
+        // std::cerr << ".. " << condition->result_name << std::endl;
+        conditions.push_back(split_result.split_nodes_mapping.at(condition));
+    }
+
+    prewhere_info->prewhere_actions = std::move(split_result.first);
+    prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
+
+    if (conditions.size() == 1)
+    {
+        prewhere_info->prewhere_column_name = conditions.front()->result_name;
+        prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front());
+    }
+    else
+    {
         prewhere_info->remove_prewhere_column = true;
 
-        if (conditions.size() == 1)
-        {
-            for (const auto * output : prewhere_info->prewhere_actions->getOutputs())
-            {
-                if (output == conditions.front())
-                    prewhere_info->remove_prewhere_column = false;
-            }
+        FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+        const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
+        prewhere_info->prewhere_column_name = node->result_name;
+        prewhere_info->prewhere_actions->getOutputs().push_back(node);
+    }
 
-            prewhere_info->prewhere_column_name = conditions.front()->result_name;
-        }
-        else
-        {
+    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
+    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
 
-            FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
-            const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
-            prewhere_info->prewhere_column_name = node->result_name;
-            prewhere_info->prewhere_actions->getOutputs().push_back(node);
-        }
+    read_from_merge_tree->updatePrewhereInfo(prewhere_info);
 
-        read_from_merge_tree->updatePrewhereInfo(prewhere_info);
+    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
+    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
+
+    if (!optimize_result.fully_moved_to_prewhere)
+    {
         filter_node->step = std::make_unique<FilterStep>(
             read_from_merge_tree->getOutputStream(),
             std::move(split_result.second),
             filter_step->getFilterColumnName(),
             filter_step->removesFilterColumn());
-
-        return;
     }
+    else
+    {
+        // std::cerr << split_result.second->dumpDAG() << std::endl;
+        // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
+        // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
 
-    prewhere_info->prewhere_actions = filter_step->getExpression();
-    prewhere_info->prewhere_column_name = filter_step->getFilterColumnName();
-    prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn();
+        filter_node->step = std::make_unique<ExpressionStep>(
+            read_from_merge_tree->getOutputStream(),
+            std::move(split_result.second));
+    }
+    // return;
+    // }
 
-    read_from_merge_tree->updatePrewhereInfo(prewhere_info);
+    // std::cerr << "!!!!!!!!!!!!!!!!\n";
+
+    // prewhere_info->prewhere_actions = filter_step->getExpression();
+    // prewhere_info->prewhere_actions->projectInput(false);
+    // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl;
+    // prewhere_info->prewhere_column_name = filter_step->getFilterColumnName();
+    // prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn();
+
+    // read_from_merge_tree->updatePrewhereInfo(prewhere_info);
 
     // replace_old_filter_node = frame.node;
     // remove_filter_node = true;
@@ -405,23 +466,23 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     //     }
     // }
 
-    QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
+    // QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
 
-    for (auto & filter_parent_child : filter_parent_node->children)
-    {
-        if (filter_parent_child == filter_node)
-        {
-            filter_parent_child = frame.node;
+    // for (auto & filter_parent_child : filter_parent_node->children)
+    // {
+    //     if (filter_parent_child == filter_node)
+    //     {
+    //         filter_parent_child = frame.node;
 
-            size_t stack_size = stack.size();
+    //         size_t stack_size = stack.size();
 
-            /// Step is completely replaced with PREWHERE filter actions, remove it from stack.
-            std::swap(stack[stack_size - 1], stack[stack_size - 2]);
-            stack.pop_back();
+    //         /// Step is completely replaced with PREWHERE filter actions, remove it from stack.
+    //         std::swap(stack[stack_size - 1], stack[stack_size - 2]);
+    //         stack.pop_back();
 
-            break;
-        }
-    }
+    //         break;
+    //     }
+    // }
 }
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 5ed56f59fc1..6adc48d87d2 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -89,6 +89,34 @@ size_t countPartitions(const MergeTreeData::DataPartsVector & prepared_parts)
     return countPartitions(prepared_parts, get_partition_id);
 }
 
+bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs)
+{
+    std::unordered_set<const ActionsDAG::Node *> outputs(dag.getOutputs().begin(), dag.getOutputs().end());
+    bool added = false;
+    for (const auto * input : dag.getInputs())
+    {
+        if (inputs.contains(input->result_name) && !outputs.contains(input))
+        {
+            dag.getOutputs().push_back(input);
+            added = true;
+        }
+    }
+
+    return added;
+}
+
+bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
+{
+    bool added = false;
+    if (info.row_level_filter)
+        added = added || restoreDAGInputs(*info.row_level_filter, inputs);
+
+    if (info.prewhere_actions)
+        added = added || restoreDAGInputs(*info.prewhere_actions, inputs);
+
+    return added;
+}
+
 }
 
 namespace ProfileEvents
@@ -786,18 +814,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
     /// To fix this, we prohibit removing any input in prewhere actions. Instead, projection actions will be added after sorting.
     /// See 02354_read_in_order_prewhere.sql as an example.
     bool have_input_columns_removed_after_prewhere = false;
-    if (prewhere_info && prewhere_info->prewhere_actions)
+    if (prewhere_info)
     {
-        auto & outputs = prewhere_info->prewhere_actions->getOutputs();
-        std::unordered_set<const ActionsDAG::Node *> outputs_set(outputs.begin(), outputs.end());
-        for (const auto * input : prewhere_info->prewhere_actions->getInputs())
-        {
-            if (!outputs_set.contains(input))
-            {
-                outputs.push_back(input);
-                have_input_columns_removed_after_prewhere = true;
-            }
-        }
+        NameSet sorting_columns;
+        for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
+            sorting_columns.insert(column.name);
+
+        have_input_columns_removed_after_prewhere = restorePrewhereInputs(*prewhere_info, sorting_columns);
     }
 
     /// Let's split ranges to avoid reading much data.
@@ -984,7 +1007,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
             /// Thus we need to merge all partition parts into a single sorted stream.
             Pipe pipe = Pipe::unitePipes(std::move(pipes));
             merge_streams(pipe);
-            out_projection = createProjection(pipe_header);
             return pipe;
         }
 
@@ -1133,6 +1155,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
 
     auto sorting_expr = std::make_shared<ExpressionActions>(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());
 
+    if (prewhere_info)
+    {
+        NameSet sorting_columns;
+        for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
+            sorting_columns.insert(column.name);
+        restorePrewhereInputs(*prewhere_info, sorting_columns);
+    }
+
     for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
     {
         /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
@@ -1802,13 +1832,20 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
 
     if (!final && result.sampling.use_sampling)
     {
+        NameSet sampling_columns;
+
         /// Add columns needed for `sample_by_ast` to `column_names_to_read`.
         /// Skip this if final was used, because such columns were already added from PK.
         for (const auto & column : result.sampling.filter_expression->getRequiredColumns().getNames())
         {
             if (!names.contains(column))
                 column_names_to_read.push_back(column);
+
+            sampling_columns.insert(column);
         }
+
+        if (prewhere_info)
+            restorePrewhereInputs(*prewhere_info, sampling_columns);
     }
 
     if (final)
@@ -2002,6 +2039,24 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
         });
     }
 
+    /// Some extra columns could be added by sample/final/in-order/etc
+    /// Remove them from header if not needed.
+    if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header))
+    {
+        auto convert_actions_dag = ActionsDAG::makeConvertingActions(
+            pipe.getHeader().getColumnsWithTypeAndName(),
+            getOutputStream().header.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Name,
+            true);
+
+        auto converting_dag_expr = std::make_shared<ExpressionActions>(convert_actions_dag);
+
+        pipe.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<ExpressionTransform>(header, converting_dag_expr);
+        });
+    }
+
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
 
diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index aeff438f509..4e93bd267ec 100644
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -449,8 +449,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere
 Block MergeTreeSelectProcessor::transformHeader(
     Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
 {
+    injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
     auto transformed = applyPrewhereActions(std::move(block), prewhere_info);
-    injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns);
     return transformed;
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 151ce7635b2..c52a2fee051 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -132,14 +132,14 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op
     if (!optimize_result)
         return {};
 
-    if (optimize_result->where_conditions.empty())
-        return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
+    // if (optimize_result->where_conditions.empty())
+    //     return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
 
     std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
     for (const auto & condition : optimize_result->prewhere_conditions)
         prewhere_conditions.insert(condition.node.getDAGNode());
 
-    return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = false};
+    return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()};
 }
 
 static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column)
diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql
index dc8fceddc52..73ae6eb499f 100644
--- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql
+++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql
@@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000
 
 SELECT 'PREWHERE';
 SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000;
-SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 }
-SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 }
+SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 }
+SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 }
 SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
 SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;

From df0c30878eaca90c3eb4426e93727481b9ed4f0b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 19 Jan 2024 10:44:58 +0000
Subject: [PATCH 426/884] Fixing style

---
 src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index a9405d0cbdb..6872141951e 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -214,8 +214,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         outputs.resize(size);
     }
 
-    // std::cerr << "!!!!!!!!!!!!!!!!\n";
-
     // if (!optimize_result.fully_moved_to_prewhere)
     // {
     auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
@@ -283,8 +281,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     // return;
     // }
 
-    // std::cerr << "!!!!!!!!!!!!!!!!\n";
-
     // prewhere_info->prewhere_actions = filter_step->getExpression();
     // prewhere_info->prewhere_actions->projectInput(false);
     // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl;

From 41d624317064bc2a2784b6afaf3b3f22548c2f2b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 7 Feb 2024 12:43:26 +0000
Subject: [PATCH 427/884] Fix PreparedSets

---
 src/Interpreters/PreparedSets.cpp | 9 +--------
 src/Interpreters/PreparedSets.h   | 2 +-
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index 946eef08ce3..76f75cde1dc 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -205,18 +205,11 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context)
         }
     }
 
-    if (!set_and_key->set->hasSetElements())
-        set_and_key->set->fillSetElements();
-
-    return buildSetInplace(context);
-}
-
-SetPtr FutureSetFromSubquery::buildSetInplace(const ContextPtr & context)
-{
     auto plan = build(context);
     if (!plan)
         return nullptr;
 
+    set_and_key->set->fillSetElements();
     auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
     auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
     pipeline.complete(std::make_shared<EmptySink>(Block()));
diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h
index a65f30351db..3419d3b6839 100644
--- a/src/Interpreters/PreparedSets.h
+++ b/src/Interpreters/PreparedSets.h
@@ -111,9 +111,9 @@ public:
     SetPtr get() const override;
     DataTypes getTypes() const override;
     SetPtr buildOrderedSetInplace(const ContextPtr & context) override;
-    SetPtr buildSetInplace(const ContextPtr & context);
 
     std::unique_ptr<QueryPlan> build(const ContextPtr & context);
+    void buildSetInplace(const ContextPtr & context);
 
     QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); }
     void setQueryPlan(std::unique_ptr<QueryPlan> source_);

From a1eb9abaf09af2ade899e0fe6199f04595bd54f0 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 7 Feb 2024 13:49:35 +0100
Subject: [PATCH 428/884] Add test for increase-always autoscaling lambda

---
 tests/ci/autoscale_runners_lambda/app.py           |  5 +++--
 .../ci/autoscale_runners_lambda/test_autoscale.py  | 14 ++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 120126b404a..26a05ab0af4 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -8,11 +8,10 @@ from pprint import pformat
 from typing import Any, List, Literal, Optional, Tuple
 
 import boto3  # type: ignore
-
 from lambda_shared import (
+    RUNNER_TYPE_LABELS,
     CHException,
     ClickHouseHelper,
-    RUNNER_TYPE_LABELS,
     get_parameter_from_ssm,
 )
 
@@ -115,6 +114,8 @@ def set_capacity(
         # Are we already at the capacity limits
         stop = stop or asg["MaxSize"] <= asg["DesiredCapacity"]
         # Let's calculate a new desired capacity
+        # (capacity_deficit + scale_up - 1) // scale_up : will increase min by 1
+        # if there is any capacity_deficit
         desired_capacity = (
             asg["DesiredCapacity"] + (capacity_deficit + scale_up - 1) // scale_up
         )
diff --git a/tests/ci/autoscale_runners_lambda/test_autoscale.py b/tests/ci/autoscale_runners_lambda/test_autoscale.py
index 464e5695556..21a407276f9 100644
--- a/tests/ci/autoscale_runners_lambda/test_autoscale.py
+++ b/tests/ci/autoscale_runners_lambda/test_autoscale.py
@@ -4,7 +4,7 @@ import unittest
 from dataclasses import dataclass
 from typing import Any, List
 
-from app import set_capacity, Queue
+from app import Queue, set_capacity
 
 
 @dataclass
@@ -68,10 +68,16 @@ class TestSetCapacity(unittest.TestCase):
         test_cases = (
             # Do not change capacity
             TestCase("noqueue", 1, 13, 20, [Queue("in_progress", 155, "noqueue")], -1),
-            TestCase(
-                "w/reserve-1", 1, 13, 20, [Queue("queued", 15, "w/reserve-1")], 14
-            ),
+            TestCase("reserve", 1, 13, 20, [Queue("queued", 13, "reserve")], -1),
             # Increase capacity
+            TestCase(
+                "increase-always",
+                1,
+                13,
+                20,
+                [Queue("queued", 14, "increase-always")],
+                14,
+            ),
             TestCase("increase-1", 1, 13, 20, [Queue("queued", 23, "increase-1")], 17),
             TestCase(
                 "style-checker", 1, 13, 20, [Queue("queued", 33, "style-checker")], 20

From e8c0637fe2eebb2a369a79ac2c2e8b381e7a726c Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 2 Feb 2024 22:53:28 +0100
Subject: [PATCH 429/884] fix

---
 src/Processors/Transforms/AggregatingTransform.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 2fd9f102159..74da97f2199 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -600,6 +600,12 @@ IProcessor::Status AggregatingTransform::prepare()
         if (is_consume_finished)
         {
             output.finish();
+            /// input.isFinished() means that merging is done. Now we can release our reference to aggregation states.
+            /// TODO: there is another case, when output port is getting closed first.
+            /// E.g. `select ... group by x limit 10`, if it was two-level aggregation and first few buckets contained already enough rows
+            /// limit will stop merging. It turned out to be not trivial to both release aggregation states and ensure that
+            /// ManyAggregatedData holds the last references to them to trigger parallel destruction in its dtor. Will work on that.
+            many_data.reset();
             return Status::Finished;
         }
         else
@@ -828,8 +834,6 @@ void AggregatingTransform::initGenerate()
 
         processors = Pipe::detachProcessors(std::move(pipe));
     }
-
-    many_data.reset();
 }
 
 }

From 74b37ea5b04047f3207d05caf83c3f2b8645a9a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 7 Feb 2024 14:23:43 +0100
Subject: [PATCH 430/884] Revert "Use `MergeTree` as a default table engine"

---
 src/Core/Settings.h                                      | 2 +-
 src/Core/SettingsChangesHistory.h                        | 1 -
 tests/queries/0_stateless/02184_default_table_engine.sql | 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index a433d523358..dc863576a85 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -559,7 +559,7 @@ class IColumn;
     M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
     \
     M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
-    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
+    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
     M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
     M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
     M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index a1c1523aaa9..db3a76e29cd 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -109,7 +109,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql
index aff30eeea98..a984ec1b6c9 100644
--- a/tests/queries/0_stateless/02184_default_table_engine.sql
+++ b/tests/queries/0_stateless/02184_default_table_engine.sql
@@ -1,5 +1,3 @@
-SET default_table_engine = 'None';
-
 CREATE TABLE table_02184 (x UInt8); --{serverError 119}
 SET default_table_engine = 'Log';
 CREATE TABLE table_02184 (x UInt8);

From a64d9cf1c0ab4294a8ea9c83fa7d03bd6b7fe4c6 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 7 Feb 2024 14:15:49 +0100
Subject: [PATCH 431/884] add test

---
 .../02982_aggregation_states_destruction.reference |  1 +
 .../02982_aggregation_states_destruction.sh        | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/02982_aggregation_states_destruction.reference
 create mode 100755 tests/queries/0_stateless/02982_aggregation_states_destruction.sh

diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh
new file mode 100755
index 00000000000..1c72cf2b8c1
--- /dev/null
+++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Tags: no-random-settings
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+query_id="02982_$RANDOM"
+$CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select number, uniq(number) from numbers_mt(1e7) group by number limit 100 format Null;"
+
+$CLICKHOUSE_CLIENT -q "system flush logs;"
+
+$CLICKHOUSE_CLIENT -q "select count() > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';"

From 7016e1d397de7f50dc39b59f370924fc93f1a889 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 15:43:58 +0100
Subject: [PATCH 432/884] Maybe better

---
 src/Parsers/IParser.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index 99dbc39f06f..d5c9d48d730 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -82,7 +82,12 @@ public:
               * The frequency is arbitrary, but not too large, not too small,
               * and a power of two to simplify the division.
               */
-            if (depth % 8192 == 0)
+#ifdef USE_MUSL
+            static constexpr uint32_t check_frequency = 128;
+#else
+            static constexpr uint32_t check_frequency = 8192;
+#endif
+            if (depth % check_frequency == 0)
                 checkStackSize();
         }
 

From c391cb5f56d7c54a009d3110d26a6bb40c610c35 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 7 Feb 2024 14:53:55 +0000
Subject: [PATCH 433/884] Remove SourceWithKeyCondition from
 ReadFromStorageStep

---
 src/Processors/QueryPlan/ReadFromPreparedSource.cpp | 9 +--------
 src/Processors/QueryPlan/ReadFromPreparedSource.h   | 4 +---
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
index bf2e49727ed..b845101125b 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
@@ -7,7 +7,7 @@ namespace DB
 {
 
 ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_)
-    : SourceStepWithFilter(DataStream{.header = pipe_.getHeader()})
+    : ISourceStep(DataStream{.header = pipe_.getHeader()})
     , pipe(std::move(pipe_))
 {
 }
@@ -35,11 +35,4 @@ ReadFromStorageStep::ReadFromStorageStep(
         processor->setStorageLimits(query_info.storage_limits);
 }
 
-void ReadFromStorageStep::applyFilters()
-{
-    for (const auto & processor : pipe.getProcessors())
-        if (auto * source = dynamic_cast<SourceWithKeyCondition *>(processor.get()))
-            source->setKeyCondition(filter_nodes.nodes, context);
-}
-
 }
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h
index 2eea48553b3..b40a656cee3 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.h
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h
@@ -2,7 +2,6 @@
 
 #include <Interpreters/Context.h>
 #include <Processors/QueryPlan/ISourceStep.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <QueryPipeline/Pipe.h>
 #include <Storages/SelectQueryInfo.h>
 
@@ -10,7 +9,7 @@ namespace DB
 {
 
 /// Create source from prepared pipe.
-class ReadFromPreparedSource : public SourceStepWithFilter
+class ReadFromPreparedSource : public ISourceStep
 {
 public:
     explicit ReadFromPreparedSource(Pipe pipe_);
@@ -28,7 +27,6 @@ public:
     ReadFromStorageStep(Pipe pipe_, String storage_name, ContextPtr context_, const SelectQueryInfo & query_info_);
 
     String getName() const override { return "ReadFromStorage"; }
-    void applyFilters() override;
 
 private:
     ContextPtr context;

From 99be5b5cb1d49222dc85c184e51cc9f83aa8de15 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 7 Feb 2024 15:57:52 +0100
Subject: [PATCH 434/884] Fix

---
 src/Coordination/KeeperSnapshotManager.cpp | 14 +++++-----
 src/Coordination/KeeperStorage.cpp         | 32 ++++++++++++----------
 src/Coordination/KeeperStorage.h           | 19 +++++++++++--
 src/Coordination/ZooKeeperDataReader.cpp   |  6 ++--
 4 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index c176536dfbf..07116fedfab 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -16,7 +16,7 @@
 #include <Coordination/pathUtils.h>
 #include <Coordination/KeeperConstants.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
-#include "Core/Field.h"
+#include <Core/Field.h>
 #include <Disks/DiskLocal.h>
 
 
@@ -86,10 +86,10 @@ namespace
         writeBinary(node.version, out);
         writeBinary(node.cversion, out);
         writeBinary(node.aversion, out);
-        const bool is_ephemeral = node.isEphemeral();
-        writeBinary(is_ephemeral ? node.ephemeralOwner() : 0, out);
+        writeBinary(node.ephemeralOwner(), out);
         if (version < SnapshotVersion::V6)
             writeBinary(static_cast<int32_t>(node.data_size), out);
+        const bool is_ephemeral = node.isEphemeral();
         writeBinary(is_ephemeral ? 0 : node.numChildren(), out);
         writeBinary(node.pzxid, out);
 
@@ -162,15 +162,15 @@ namespace
         }
         int32_t num_children = 0;
         readBinary(num_children, in);
-        if (num_children != 0)
-            node.ephemeral_or_children_data.children_info.num_children = num_children;
+        if (ephemeral_owner == 0)
+            node.setNumChildren(num_children);
 
         readBinary(node.pzxid, in);
 
         int32_t seq_num = 0;
         readBinary(seq_num, in);
-        if (seq_num != 0)
-            node.ephemeral_or_children_data.children_info.seq_num = seq_num;
+        if (ephemeral_owner == 0)
+            node.setSeqNum(seq_num);
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
         {
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index ac4a9433e30..e855274f08b 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -172,7 +172,11 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
 
     hash.update(path);
 
-    hash.update(node.data, node.data_size);
+    if (node.data_size != 0)
+    {
+        chassert(node.data != nullptr);
+        hash.update(node.data, node.data_size);
+    }
 
     hash.update(node.czxid);
     hash.update(node.mzxid);
@@ -369,7 +373,7 @@ void KeeperStorage::initializeSystemNodes()
 
         // update root and the digest based on it
         auto current_root_it = container.find("/");
-        assert(current_root_it != container.end());
+        chassert(current_root_it != container.end());
         removeDigest(current_root_it->value, "/");
         auto updated_root_it = container.updateValue(
             "/",
@@ -385,7 +389,7 @@ void KeeperStorage::initializeSystemNodes()
     // insert child system nodes
     for (const auto & [path, data] : keeper_context->getSystemNodesWithData())
     {
-        assert(path.starts_with(keeper_system_path));
+        chassert(path.starts_with(keeper_system_path));
         Node child_system_node;
         child_system_node.setData(data);
         auto [map_key, _] = container.insert(std::string{path}, child_system_node);
@@ -430,7 +434,7 @@ std::shared_ptr<KeeperStorage::Node> KeeperStorage::UncommittedState::tryGetNode
 
 void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
 {
-    assert(!delta.path.empty());
+    chassert(!delta.path.empty());
     if (!nodes.contains(delta.path))
     {
         if (auto storage_node = tryGetNodeFromStorage(delta.path))
@@ -446,7 +450,7 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
 
             if constexpr (std::same_as<DeltaType, CreateNodeDelta>)
             {
-                assert(!node);
+                chassert(!node);
                 node = std::make_shared<Node>();
                 node->copyStats(operation.stat);
                 node->setData(operation.data);
@@ -455,13 +459,13 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
             }
             else if constexpr (std::same_as<DeltaType, RemoveNodeDelta>)
             {
-                assert(node);
+                chassert(node);
                 node = nullptr;
                 last_applied_zxid = delta.zxid;
             }
             else if constexpr (std::same_as<DeltaType, UpdateNodeDelta>)
             {
-                assert(node);
+                chassert(node);
                 node->invalidateDigestCache();
                 operation.update_fn(*node);
                 last_applied_zxid = delta.zxid;
@@ -499,7 +503,7 @@ void KeeperStorage::UncommittedState::addDeltas(std::vector<Delta> new_deltas)
 
 void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
 {
-    assert(deltas.empty() || deltas.front().zxid >= commit_zxid);
+    chassert(deltas.empty() || deltas.front().zxid >= commit_zxid);
 
     // collect nodes that have no further modification in the current transaction
     std::unordered_set<std::string> modified_nodes;
@@ -517,7 +521,7 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
         if (!front_delta.path.empty())
         {
             auto & path_deltas = deltas_for_path.at(front_delta.path);
-            assert(path_deltas.front() == &front_delta);
+            chassert(path_deltas.front() == &front_delta);
             path_deltas.pop_front();
             if (path_deltas.empty())
             {
@@ -535,7 +539,7 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
         else if (auto * add_auth = std::get_if<AddAuthDelta>(&front_delta.operation))
         {
             auto & uncommitted_auth = session_and_auth[add_auth->session_id];
-            assert(!uncommitted_auth.empty() && uncommitted_auth.front() == &add_auth->auth_id);
+            chassert(!uncommitted_auth.empty() && uncommitted_auth.front() == &add_auth->auth_id);
             uncommitted_auth.pop_front();
             if (uncommitted_auth.empty())
                 session_and_auth.erase(add_auth->session_id);
@@ -575,7 +579,7 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
         if (delta_it->zxid < rollback_zxid)
             break;
 
-        assert(delta_it->zxid == rollback_zxid);
+        chassert(delta_it->zxid == rollback_zxid);
         if (!delta_it->path.empty())
         {
             std::visit(
@@ -1917,7 +1921,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
             }
         }
 
-        assert(request.requests.empty() || operation_type.has_value());
+        chassert(request.requests.empty() || operation_type.has_value());
     }
 
     std::vector<KeeperStorage::Delta>
@@ -1966,7 +1970,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
 
         auto & deltas = storage.uncommitted_state.deltas;
         // the deltas will have at least SubDeltaEnd or FailedMultiDelta
-        assert(!deltas.empty());
+        chassert(!deltas.empty());
         if (auto * failed_multi = std::get_if<KeeperStorage::FailedMultiDelta>(&deltas.front().operation))
         {
             for (size_t i = 0; i < concrete_requests.size(); ++i)
@@ -2393,7 +2397,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(
 
         if (is_local)
         {
-            assert(zk_request->isReadRequest());
+            chassert(zk_request->isReadRequest());
             if (check_acl && !request_processor->checkAuth(*this, session_id, true))
             {
                 response = zk_request->makeResponse();
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index 55d2e20d44c..c3350275d2f 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -50,7 +50,6 @@ public:
             int64_t mtime : 63;
         } is_ephemeral_and_mtime{false, 0};
 
-
         union
         {
             int64_t ephemeral_owner;
@@ -89,7 +88,9 @@ public:
 
         int64_t ephemeralOwner() const
         {
-            return isEphemeral() ? ephemeral_or_children_data.ephemeral_owner : 0;
+            if (isEphemeral())
+                return ephemeral_or_children_data.ephemeral_owner;
+            return 0;
         }
 
         void setEphemeralOwner(int64_t ephemeral_owner)
@@ -100,9 +101,17 @@ public:
 
         int32_t numChildren() const
         {
+            if (isEphemeral())
+                return 0;
+
             return ephemeral_or_children_data.children_info.num_children;
         }
 
+        void setNumChildren(int32_t num_children)
+        {
+            ephemeral_or_children_data.children_info.num_children = num_children;
+        }
+
         void increaseNumChildren()
         {
             chassert(!isEphemeral());
@@ -111,6 +120,9 @@ public:
 
         int32_t seqNum() const
         {
+            if (isEphemeral())
+                return 0;
+
             return ephemeral_or_children_data.children_info.seq_num;
         }
 
@@ -121,6 +133,7 @@ public:
 
         void increaseSeqNum()
         {
+            chassert(!isEphemeral());
             ++ephemeral_or_children_data.children_info.seq_num;
         }
 
@@ -175,8 +188,10 @@ public:
         ChildrenSet children{};
     };
 
+#if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER)
     static_assert(
         sizeof(ListNode<Node>) <= 144, "std::list node containing ListNode<Node> is > 160 bytes which will increase memory consumption");
+#endif
 
     enum DigestVersion : uint8_t
     {
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index 2e173667e28..e027b44b3a2 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -123,11 +123,13 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP
         int64_t ephemeral_owner;
         Coordination::read(ephemeral_owner, in);
         if (ephemeral_owner != 0)
-          node.setEphemeralOwner(ephemeral_owner);
+            node.setEphemeralOwner(ephemeral_owner);
         Coordination::read(node.pzxid, in);
         if (!path.empty())
         {
-            node.setSeqNum(node.cversion);
+            if (ephemeral_owner == 0)
+                node.setSeqNum(node.cversion);
+
             storage.container.insertOrReplace(path, node);
 
             if (ephemeral_owner != 0)

From ff4729ccc4adbedeb3f70d5221820e9f54febaab Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 7 Feb 2024 16:40:23 +0100
Subject: [PATCH 435/884] Improve replaceTableExpressionAndRemoveJoin function

---
 src/Storages/StorageMerge.cpp | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index df5b0cd715d..301ff3f37d5 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -60,7 +60,7 @@
 #include <Common/assert_cast.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
-#include "Core/NamesAndTypes.h"
+#include <Core/NamesAndTypes.h>
 #include <Functions/FunctionFactory.h>
 
 namespace
@@ -625,17 +625,25 @@ namespace
 class ApplyAliasColumnExpressionsVisitor : public InDepthQueryTreeVisitor<ApplyAliasColumnExpressionsVisitor>
 {
 public:
-    ApplyAliasColumnExpressionsVisitor() = default;
+    explicit ApplyAliasColumnExpressionsVisitor(QueryTreeNodePtr replacement_table_expression_)
+        : replacement_table_expression(replacement_table_expression_)
+    {}
 
     void visitImpl(QueryTreeNodePtr & node)
     {
-        if (auto * column = node->as<ColumnNode>();
-            column != nullptr && column->hasExpression())
+        if (auto * column = node->as<ColumnNode>(); column != nullptr)
         {
-            node = column->getExpressionOrThrow();
-            node->setAlias(column->getColumnName());
+            if (column->hasExpression())
+            {
+                node = column->getExpressionOrThrow();
+                node->setAlias(column->getColumnName());
+            }
+            else
+                column->setColumnSource(replacement_table_expression);
         }
     }
+private:
+    QueryTreeNodePtr replacement_table_expression;
 };
 
 bool hasUnknownColumn(const QueryTreeNodePtr & node, QueryTreeNodePtr replacement_table_expression)
@@ -783,7 +791,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
         auto fake_column = resolved_column->getColumn();
 
-        ApplyAliasColumnExpressionsVisitor visitor;
+        ApplyAliasColumnExpressionsVisitor visitor(replacement_table_expression);
         visitor.visit(fake_node);
 
         projection.push_back(fake_node);
@@ -865,7 +873,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
                     auto * resolved_column = fake_node->as<ColumnNode>();
 
                     column_node = fake_node;
-                    ApplyAliasColumnExpressionsVisitor visitor;
+                    ApplyAliasColumnExpressionsVisitor visitor(replacement_table_expression);
                     visitor.visit(column_node);
 
                     if (!resolved_column || !resolved_column->getExpression())

From 813020a1a6f4462dea1edec3cc42e000206b6327 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 7 Feb 2024 16:44:09 +0100
Subject: [PATCH 436/884] remove redundant logging

---
 src/Storages/StorageMerge.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 301ff3f37d5..d3b8f30b1c5 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -800,8 +800,6 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
 
     query_node->resolveProjectionColumns(std::move(projection_columns));
 
-    LOG_DEBUG(&Poco::Logger::get("removeJoin"), "Result:\n{}", modified_query->dumpTree());
-
     return modified_query;
 }
 

From ae7dcdffd89083e3a5a4ee66838387c35853f54b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 7 Feb 2024 15:46:08 +0000
Subject: [PATCH 437/884] Undo terrible silent patching

---
 contrib/libssh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libssh b/contrib/libssh
index 2c76332ef56..5b43b6183c9 160000
--- a/contrib/libssh
+++ b/contrib/libssh
@@ -1 +1 @@
-Subproject commit 2c76332ef56d90f55965ab24da6b6dbcbef29c4c
+Subproject commit 5b43b6183c93c5d170b13719e1fcc9c3def7e5d2

From e3896a31c69ae6d1f40c24ae4d40f2c7215f9e09 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 7 Feb 2024 15:56:01 +0000
Subject: [PATCH 438/884] Upgrade to v0.9.8

---
 contrib/libssh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libssh b/contrib/libssh
index 5b43b6183c9..ed4011b9187 160000
--- a/contrib/libssh
+++ b/contrib/libssh
@@ -1 +1 @@
-Subproject commit 5b43b6183c93c5d170b13719e1fcc9c3def7e5d2
+Subproject commit ed4011b91873836713576475a98cd132cd834539

From a740fc7835ff54bda95c322d9d19e31349144d8e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 7 Feb 2024 14:46:38 +0000
Subject: [PATCH 439/884] Cleanup CMakeLists.txt file

---
 contrib/libssh-cmake/CMakeLists.txt       | 104 +++++++++++++++++-----
 contrib/libssh-cmake/IncludeSources.cmake |  98 --------------------
 2 files changed, 84 insertions(+), 118 deletions(-)
 delete mode 100644 contrib/libssh-cmake/IncludeSources.cmake

diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt
index eee3df832fa..7b589718140 100644
--- a/contrib/libssh-cmake/CMakeLists.txt
+++ b/contrib/libssh-cmake/CMakeLists.txt
@@ -8,24 +8,12 @@ endif()
 set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
 set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
 
-project(libssh VERSION 0.9.7 LANGUAGES C)
+# Set CMake variables which are used in libssh_version.h.cmake
+project(libssh VERSION 0.9.8 LANGUAGES C)
 
-# global needed variable
-set(APPLICATION_NAME ${PROJECT_NAME})
-
-# SOVERSION scheme: CURRENT.AGE.REVISION
-#   If there was an incompatible interface change:
-#     Increment CURRENT. Set AGE and REVISION to 0
-#   If there was a compatible interface change:
-#     Increment AGE. Set REVISION to 0
-#   If the source code was changed, but there were no interface changes:
-#     Increment REVISION.
-set(LIBRARY_VERSION "4.8.7")
+set(LIBRARY_VERSION "4.8.8")
 set(LIBRARY_SOVERSION "4")
 
-# Copy library files to a lib sub-directory
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib")
-
 set(CMAKE_THREAD_PREFER_PTHREADS ON)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
@@ -33,7 +21,87 @@ set(WITH_ZLIB OFF)
 set(WITH_SYMBOL_VERSIONING OFF)
 set(WITH_SERVER ON)
 
-include(IncludeSources.cmake)
+set(libssh_SRCS
+    ${LIB_SOURCE_DIR}/src/agent.c
+    ${LIB_SOURCE_DIR}/src/auth.c
+    ${LIB_SOURCE_DIR}/src/base64.c
+    ${LIB_SOURCE_DIR}/src/bignum.c
+    ${LIB_SOURCE_DIR}/src/buffer.c
+    ${LIB_SOURCE_DIR}/src/callbacks.c
+    ${LIB_SOURCE_DIR}/src/channels.c
+    ${LIB_SOURCE_DIR}/src/client.c
+    ${LIB_SOURCE_DIR}/src/config.c
+    ${LIB_SOURCE_DIR}/src/connect.c
+    ${LIB_SOURCE_DIR}/src/connector.c
+    ${LIB_SOURCE_DIR}/src/curve25519.c
+    ${LIB_SOURCE_DIR}/src/dh.c
+    ${LIB_SOURCE_DIR}/src/ecdh.c
+    ${LIB_SOURCE_DIR}/src/error.c
+    ${LIB_SOURCE_DIR}/src/getpass.c
+    ${LIB_SOURCE_DIR}/src/init.c
+    ${LIB_SOURCE_DIR}/src/kdf.c
+    ${LIB_SOURCE_DIR}/src/kex.c
+    ${LIB_SOURCE_DIR}/src/known_hosts.c
+    ${LIB_SOURCE_DIR}/src/knownhosts.c
+    ${LIB_SOURCE_DIR}/src/legacy.c
+    ${LIB_SOURCE_DIR}/src/log.c
+    ${LIB_SOURCE_DIR}/src/match.c
+    ${LIB_SOURCE_DIR}/src/messages.c
+    ${LIB_SOURCE_DIR}/src/misc.c
+    ${LIB_SOURCE_DIR}/src/options.c
+    ${LIB_SOURCE_DIR}/src/packet.c
+    ${LIB_SOURCE_DIR}/src/packet_cb.c
+    ${LIB_SOURCE_DIR}/src/packet_crypt.c
+    ${LIB_SOURCE_DIR}/src/pcap.c
+    ${LIB_SOURCE_DIR}/src/pki.c
+    ${LIB_SOURCE_DIR}/src/pki_container_openssh.c
+    ${LIB_SOURCE_DIR}/src/poll.c
+    ${LIB_SOURCE_DIR}/src/session.c
+    ${LIB_SOURCE_DIR}/src/scp.c
+    ${LIB_SOURCE_DIR}/src/socket.c
+    ${LIB_SOURCE_DIR}/src/string.c
+    ${LIB_SOURCE_DIR}/src/threads.c
+    ${LIB_SOURCE_DIR}/src/wrapper.c
+    ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
+    ${LIB_SOURCE_DIR}/src/external/blowfish.c
+    ${LIB_SOURCE_DIR}/src/external/chacha.c
+    ${LIB_SOURCE_DIR}/src/external/poly1305.c
+    ${LIB_SOURCE_DIR}/src/chachapoly.c
+    ${LIB_SOURCE_DIR}/src/config_parser.c
+    ${LIB_SOURCE_DIR}/src/token.c
+    ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
+
+    ${LIB_SOURCE_DIR}/src/threads/noop.c
+    ${LIB_SOURCE_DIR}/src/threads/pthread.c
+
+    # LIBCRYPT specific
+    ${libssh_SRCS}
+    ${LIB_SOURCE_DIR}/src/threads/libcrypto.c
+    ${LIB_SOURCE_DIR}/src/pki_crypto.c
+    ${LIB_SOURCE_DIR}/src/ecdh_crypto.c
+    ${LIB_SOURCE_DIR}/src/libcrypto.c
+    ${LIB_SOURCE_DIR}/src/dh_crypto.c
+
+    ${LIB_SOURCE_DIR}/src/options.c
+    ${LIB_SOURCE_DIR}/src/server.c
+    ${LIB_SOURCE_DIR}/src/bind.c
+    ${LIB_SOURCE_DIR}/src/bind_config.c
+)
+
+if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
+    add_compile_definitions(USE_BORINGSSL=1)
+endif()
+
+configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY)
+
+add_library(_ssh STATIC ${libssh_SRCS})
+add_library(ch_contrib::ssh ALIAS _ssh)
+
+target_link_libraries(_ssh PRIVATE OpenSSL::Crypto)
+
+target_include_directories(_ssh PUBLIC "${LIB_SOURCE_DIR}/include" "${LIB_BINARY_DIR}/include")
+
+# These headers need to be generated using the native build system on each platform.
 if (OS_LINUX)
     if (ARCH_AMD64)
         if (USE_MUSL)
@@ -63,7 +131,3 @@ elseif (OS_FREEBSD)
 else ()
     message(FATAL_ERROR "Platform is not supported")
 endif()
-
-configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake
-               ${LIB_BINARY_DIR}/include/libssh/libssh_version.h
-               @ONLY)
diff --git a/contrib/libssh-cmake/IncludeSources.cmake b/contrib/libssh-cmake/IncludeSources.cmake
deleted file mode 100644
index 30348d5d7dd..00000000000
--- a/contrib/libssh-cmake/IncludeSources.cmake
+++ /dev/null
@@ -1,98 +0,0 @@
-set(LIBSSH_LINK_LIBRARIES
-  ${LIBSSH_LINK_LIBRARIES}
-  OpenSSL::Crypto
-)
-
-set(libssh_SRCS
-  ${LIB_SOURCE_DIR}/src/agent.c
-  ${LIB_SOURCE_DIR}/src/auth.c
-  ${LIB_SOURCE_DIR}/src/base64.c
-  ${LIB_SOURCE_DIR}/src/bignum.c
-  ${LIB_SOURCE_DIR}/src/buffer.c
-  ${LIB_SOURCE_DIR}/src/callbacks.c
-  ${LIB_SOURCE_DIR}/src/channels.c
-  ${LIB_SOURCE_DIR}/src/client.c
-  ${LIB_SOURCE_DIR}/src/config.c
-  ${LIB_SOURCE_DIR}/src/connect.c
-  ${LIB_SOURCE_DIR}/src/connector.c
-  ${LIB_SOURCE_DIR}/src/curve25519.c
-  ${LIB_SOURCE_DIR}/src/dh.c
-  ${LIB_SOURCE_DIR}/src/ecdh.c
-  ${LIB_SOURCE_DIR}/src/error.c
-  ${LIB_SOURCE_DIR}/src/getpass.c
-  ${LIB_SOURCE_DIR}/src/init.c
-  ${LIB_SOURCE_DIR}/src/kdf.c
-  ${LIB_SOURCE_DIR}/src/kex.c
-  ${LIB_SOURCE_DIR}/src/known_hosts.c
-  ${LIB_SOURCE_DIR}/src/knownhosts.c
-  ${LIB_SOURCE_DIR}/src/legacy.c
-  ${LIB_SOURCE_DIR}/src/log.c
-  ${LIB_SOURCE_DIR}/src/match.c
-  ${LIB_SOURCE_DIR}/src/messages.c
-  ${LIB_SOURCE_DIR}/src/misc.c
-  ${LIB_SOURCE_DIR}/src/options.c
-  ${LIB_SOURCE_DIR}/src/packet.c
-  ${LIB_SOURCE_DIR}/src/packet_cb.c
-  ${LIB_SOURCE_DIR}/src/packet_crypt.c
-  ${LIB_SOURCE_DIR}/src/pcap.c
-  ${LIB_SOURCE_DIR}/src/pki.c
-  ${LIB_SOURCE_DIR}/src/pki_container_openssh.c
-  ${LIB_SOURCE_DIR}/src/poll.c
-  ${LIB_SOURCE_DIR}/src/session.c
-  ${LIB_SOURCE_DIR}/src/scp.c
-  ${LIB_SOURCE_DIR}/src/socket.c
-  ${LIB_SOURCE_DIR}/src/string.c
-  ${LIB_SOURCE_DIR}/src/threads.c
-  ${LIB_SOURCE_DIR}/src/wrapper.c
-  ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
-  ${LIB_SOURCE_DIR}/src/external/blowfish.c
-  ${LIB_SOURCE_DIR}/src/external/chacha.c
-  ${LIB_SOURCE_DIR}/src/external/poly1305.c
-  ${LIB_SOURCE_DIR}/src/chachapoly.c
-  ${LIB_SOURCE_DIR}/src/config_parser.c
-  ${LIB_SOURCE_DIR}/src/token.c
-  ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
-)
-
-set(libssh_SRCS
-    ${libssh_SRCS}
-    ${LIB_SOURCE_DIR}/src/threads/noop.c
-    ${LIB_SOURCE_DIR}/src/threads/pthread.c
-)
-
-# LIBCRYPT specific
-set(libssh_SRCS
-    ${libssh_SRCS}
-    ${LIB_SOURCE_DIR}/src/threads/libcrypto.c
-    ${LIB_SOURCE_DIR}/src/pki_crypto.c
-    ${LIB_SOURCE_DIR}/src/ecdh_crypto.c
-    ${LIB_SOURCE_DIR}/src/libcrypto.c
-    ${LIB_SOURCE_DIR}/src/dh_crypto.c
-)
-
-if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
-    add_compile_definitions(USE_BORINGSSL=1)
-endif()
-
-set(libssh_SRCS
-${libssh_SRCS}
-${LIB_SOURCE_DIR}/src/options.c
-${LIB_SOURCE_DIR}/src/server.c
-${LIB_SOURCE_DIR}/src/bind.c
-${LIB_SOURCE_DIR}/src/bind_config.c
-)
-
-
-add_library(_ssh STATIC ${libssh_SRCS})
-
-target_include_directories(_ssh PRIVATE ${LIB_BINARY_DIR})
-target_include_directories(_ssh PUBLIC "${LIB_SOURCE_DIR}/include" "${LIB_BINARY_DIR}/include")
-target_link_libraries(_ssh
-                      PRIVATE ${LIBSSH_LINK_LIBRARIES})
-
-add_library(ch_contrib::ssh ALIAS _ssh)
-
-target_compile_options(_ssh
-                     PRIVATE
-                        ${DEFAULT_C_COMPILE_FLAGS}
-                        -D_GNU_SOURCE)

From a547116821ccc680e234701f427b918e4234a3e4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 7 Feb 2024 17:58:15 +0100
Subject: [PATCH 440/884] Update SettingsChangesHistory.h

---
 src/Core/SettingsChangesHistory.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 772fe690597..f891f52822a 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -112,7 +112,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
               {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
               {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
-              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},

From a9624786f2ebb9b6633e7e324a98c52cc8b7ebfb Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Feb 2024 14:47:10 +0300
Subject: [PATCH 441/884] Revert "Revert "MergeTree FINAL optimization
 diagnostics and settings""

---
 src/Core/Settings.h                           |  2 +
 src/Core/SettingsChangesHistory.h             |  4 +-
 src/Processors/QueryPlan/PartsSplitter.cpp    | 38 +++++++++++++++----
 src/Processors/QueryPlan/PartsSplitter.h      |  3 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |  6 ++-
 5 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index dc863576a85..67bd721d3d1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -215,6 +215,8 @@ class IColumn;
     M(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), "The maximum number of bytes per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
+    M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
+    M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
     M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
     \
     M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index db3a76e29cd..a2612cab850 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -115,7 +115,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
-              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 7c66c0cc8df..363fdca22c5 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -228,7 +228,7 @@ struct SplitPartsRangesResult
     RangesInDataParts intersecting_parts_ranges;
 };
 
-SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts)
+SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, const LoggerPtr & logger)
 {
     /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts.
       *
@@ -483,10 +483,15 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts)
         intersecting_ranges_in_data_parts.end(),
         [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; });
 
+    LOG_TEST(logger, "Non intersecting ranges in data parts {}", non_intersecting_ranges_in_data_parts.getDescriptions().describe());
+    LOG_TEST(logger, "Intersecting ranges in data parts {}", intersecting_ranges_in_data_parts.getDescriptions().describe());
+
     return {std::move(non_intersecting_ranges_in_data_parts), std::move(intersecting_ranges_in_data_parts)};
 }
 
-std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts, size_t max_layers)
+std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts,
+    size_t max_layers,
+    const LoggerPtr & logger)
 {
     // We will advance the iterator pointing to the mark with the smallest PK value until
     // there will be not less than rows_per_layer rows in the current layer (roughly speaking).
@@ -591,8 +596,18 @@ std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersecting
         result_layers.back() = std::move(current_layer_builder.getCurrentRangesInDataParts());
     }
 
-    for (auto & layer : result_layers)
+    size_t result_layers_size = result_layers.size();
+    LOG_TEST(logger, "Split intersecting ranges into {} layers", result_layers_size);
+
+    for (size_t i = 0; i < result_layers_size; ++i)
     {
+        auto & layer = result_layers[i];
+
+        LOG_TEST(logger, "Layer {} {} filter values in ({}, {}])",
+            i,
+            layer.getDescriptions().describe(),
+            i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf");
+
         std::stable_sort(
             layer.begin(),
             layer.end(),
@@ -712,23 +727,32 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool force_process_all_ranges)
+    bool split_parts_ranges_into_intersecting_and_non_intersecting_final,
+    bool split_intersecting_parts_ranges_into_layers)
 {
     if (max_layers <= 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1");
 
+    auto logger = getLogger("PartsSplitter");
+
     SplitPartsWithRangesByPrimaryKeyResult result;
 
     RangesInDataParts intersecting_parts_ranges = std::move(parts);
 
-    if (!force_process_all_ranges)
+    if (split_parts_ranges_into_intersecting_and_non_intersecting_final)
     {
-        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges);
+        SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges, logger);
         result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges);
         intersecting_parts_ranges = std::move(split_result.intersecting_parts_ranges);
     }
 
-    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers);
+    if (!split_intersecting_parts_ranges_into_layers)
+    {
+        result.merging_pipes.emplace_back(in_order_reading_step_getter(intersecting_parts_ranges));
+        return result;
+    }
+
+    auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers, logger);
     auto filters = buildFilters(primary_key, borders);
     result.merging_pipes.resize(layers.size());
 
diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h
index f1ed1cb0b9c..9bceb344589 100644
--- a/src/Processors/QueryPlan/PartsSplitter.h
+++ b/src/Processors/QueryPlan/PartsSplitter.h
@@ -34,5 +34,6 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey(
     size_t max_layers,
     ContextPtr context,
     ReadingInOrderStepGetter && in_order_reading_step_getter,
-    bool force_process_all_ranges);
+    bool split_parts_ranges_into_intersecting_and_non_intersecting,
+    bool split_intersecting_parts_ranges_into_layers);
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 5ed56f59fc1..8a04caede80 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1175,7 +1175,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
 
                 /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column,
                 /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step.
-                bool force_process_all_ranges = !data.merging_params.is_deleted_column.empty();
+                bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final &&
+                    data.merging_params.is_deleted_column.empty();
 
                 SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey(
                     metadata_for_reading->getPrimaryKey(),
@@ -1184,7 +1185,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
                     num_streams,
                     context,
                     std::move(in_order_reading_step_getter),
-                    force_process_all_ranges);
+                    split_parts_ranges_into_intersecting_and_non_intersecting_final,
+                    settings.split_intersecting_parts_ranges_into_layers_final);
 
                 for (auto && non_intersecting_parts_range : split_ranges_result.non_intersecting_parts_ranges)
                     non_intersecting_parts_by_primary_key.push_back(std::move(non_intersecting_parts_range));

From 65db50731781abf6229e7c7993fb852c619b9711 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Feb 2024 15:54:23 +0300
Subject: [PATCH 442/884] Fixed tests

---
 src/Core/SettingsChangesHistory.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index a2612cab850..eb567f113d0 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -111,13 +111,13 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
-              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
+              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
     {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
               {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
-              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"},
-              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
-              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},

From 8f774f358f065b79519cc3dd45a6fbe4e6fbc43d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Feb 2024 20:16:33 +0300
Subject: [PATCH 443/884] Fixed tests

---
 src/Core/SettingsChangesHistory.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index eb567f113d0..5bd38d600c1 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -90,7 +90,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
               {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
               {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
-              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}}},
+              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

From 4a2912d7913eb7bf97720f60767c31b9f2f3053d Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Wed, 7 Feb 2024 21:47:54 +0400
Subject: [PATCH 444/884] Update src/Core/Settings.h

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 5e009cda254..2ad91cfd164 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -857,7 +857,7 @@ class IColumn;
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) \
+    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \

From 328359dcd3b97ace008750e7ab40002c1c051e2a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 7 Feb 2024 18:51:19 +0100
Subject: [PATCH 445/884] Update ZooKeeperImpl.cpp

---
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 1fbadbd7616..dcfadff536a 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -401,6 +401,9 @@ ZooKeeper::ZooKeeper(
         keeper_feature_flags.logFlags(log);
 
         ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
+
+        /// Avoid stale reads after connecting
+        sync("/", [](const SyncResponse &){});
     }
     catch (...)
     {

From 9a4dbc843ac26165a876395ae61983b3dcc32ae0 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 7 Feb 2024 17:28:09 +0000
Subject: [PATCH 446/884] validate type of arguments for minmax secondary index

---
 .../MergeTree/MergeTreeIndexMinMax.cpp        | 16 ++++++++-
 ..._minmax_index_aggregate_function.reference |  6 ++++
 .../02985_minmax_index_aggregate_function.sql | 36 +++++++++++++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02985_minmax_index_aggregate_function.reference
 create mode 100644 tests/queries/0_stateless/02985_minmax_index_aggregate_function.sql

diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
index b1f8e09be9f..20dfed8cf8f 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@@ -15,6 +15,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 
@@ -217,7 +218,20 @@ MergeTreeIndexPtr minmaxIndexCreator(
     return std::make_shared<MergeTreeIndexMinMax>(index);
 }
 
-void minmaxIndexValidator(const IndexDescription & /* index */, bool /* attach */)
+void minmaxIndexValidator(const IndexDescription & index, bool attach)
 {
+    if (attach)
+        return;
+
+    for (const auto & column : index.sample_block)
+    {
+        if (!column.type->isComparable())
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Data type of argument for minmax index must be comparable, got {} type for column {} instead",
+                column.type->getName(), column.name);
+        }
+    }
 }
+
 }
diff --git a/tests/queries/0_stateless/02985_minmax_index_aggregate_function.reference b/tests/queries/0_stateless/02985_minmax_index_aggregate_function.reference
new file mode 100644
index 00000000000..e71eb4f0d57
--- /dev/null
+++ b/tests/queries/0_stateless/02985_minmax_index_aggregate_function.reference
@@ -0,0 +1,6 @@
+1
+5	10
+6	11
+7	12
+8	13
+9	14
diff --git a/tests/queries/0_stateless/02985_minmax_index_aggregate_function.sql b/tests/queries/0_stateless/02985_minmax_index_aggregate_function.sql
new file mode 100644
index 00000000000..7d35c1b310b
--- /dev/null
+++ b/tests/queries/0_stateless/02985_minmax_index_aggregate_function.sql
@@ -0,0 +1,36 @@
+DROP TABLE IF EXISTS t_index_agg_func;
+
+CREATE TABLE t_index_agg_func
+(
+    id UInt64,
+    v AggregateFunction(avg, UInt64),
+    INDEX idx_v v TYPE minmax GRANULARITY 1
+)
+ENGINE = AggregatingMergeTree ORDER BY id
+SETTINGS index_granularity = 4; -- { serverError BAD_ARGUMENTS }
+
+CREATE TABLE t_index_agg_func
+(
+    id UInt64,
+    v AggregateFunction(avg, UInt64),
+)
+ENGINE = AggregatingMergeTree ORDER BY id
+SETTINGS index_granularity = 4;
+
+ALTER TABLE t_index_agg_func ADD INDEX idx_v v TYPE minmax GRANULARITY 1; -- { serverError BAD_ARGUMENTS }
+
+ALTER TABLE t_index_agg_func ADD INDEX idx_v finalizeAggregation(v) TYPE minmax GRANULARITY 1;
+
+INSERT INTO t_index_agg_func SELECT number % 10, initializeAggregation('avgState', toUInt64(number % 20)) FROM numbers(1000);
+INSERT INTO t_index_agg_func SELECT number % 10, initializeAggregation('avgState', toUInt64(number % 20)) FROM numbers(1000, 1000);
+
+OPTIMIZE TABLE t_index_agg_func FINAL;
+
+SELECT count() FROM system.parts WHERE table = 't_index_agg_func' AND database = currentDatabase() AND active;
+
+SET force_data_skipping_indices = 'idx_v';
+SET use_skip_indexes_if_final = 1;
+
+SELECT id, finalizeAggregation(v) AS vv FROM t_index_agg_func FINAL WHERE vv >= 10 ORDER BY id;
+
+DROP TABLE t_index_agg_func;

From 7f8449f6d8868005acee00ac9aad11aecdddb692 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 7 Feb 2024 18:52:41 +0100
Subject: [PATCH 447/884] FixedString: Assert columns are of the same size

---
 src/Columns/ColumnFixedString.cpp |  1 +
 src/Columns/ColumnFixedString.h   | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 1ba59ce4f4b..7ff2716baf8 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -203,6 +203,7 @@ void ColumnFixedString::updatePermutation(IColumn::PermutationSortDirection dire
 void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
 {
     const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
+    chassert(this->n == src_concrete.n);
 
     if (start + length > src_concrete.size())
         throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameters start = {}, length = {} are out of bound "
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 70c26ba8496..b5a9038e5de 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -130,15 +130,21 @@ public:
     int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
     {
         const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
+        chassert(this->n == rhs.n);
         return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n);
     }
 
-    void compareColumn(const IColumn & rhs, size_t rhs_row_num,
-                       PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
-                       int direction, int nan_direction_hint) const override
+    void compareColumn(
+        const IColumn & rhs_,
+        size_t rhs_row_num,
+        PaddedPODArray<UInt64> * row_indexes,
+        PaddedPODArray<Int8> & compare_results,
+        int direction,
+        int nan_direction_hint) const override
     {
-        return doCompareColumn<ColumnFixedString>(assert_cast<const ColumnFixedString &>(rhs), rhs_row_num, row_indexes,
-                                               compare_results, direction, nan_direction_hint);
+        const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
+        chassert(this->n == rhs.n);
+        return doCompareColumn<ColumnFixedString>(rhs, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint);
     }
 
     bool hasEqualValues() const override

From 39da54cd508adb7a8d071d5ba4a0f118a8a1af23 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 7 Feb 2024 19:15:59 +0000
Subject: [PATCH 448/884] Fixing more tests.

---
 .../Optimizations/optimizePrewhere.cpp        | 48 +++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 6872141951e..9d997584a28 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -142,15 +142,17 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     // if (!settings.allow_experimental_analyzer)
     //     return;
 
-    const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
-    bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal();
+    //const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
+    bool is_final = read_from_merge_tree->isQueryWithFinal(); //table_expression_modifiers && table_expression_modifiers->hasFinal();
     bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
+    // std::cerr << "============ !!! << " << is_final << ' ' << settings.optimize_move_to_prewhere_if_final << std::endl;
     if (!optimize_move_to_prewhere)
         return;
 
     const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();
 
-    if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
+    //if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
+    if (read_from_merge_tree->isQueryWithSampling())
     {
         const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();
         const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes();
@@ -226,6 +228,46 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     // for (const auto * input : split_result.second->getInputs())
     //     std::cerr << "in 2" << input->result_name << std::endl;
 
+
+    /// This is the leak of abstraction.
+    /// Splited actions may have inputs which are needed only for PREWHERE.
+    /// This is fine for ActionsDAG to have such a split, but it breakes defaults calculation.
+    ///
+    /// See 00950_default_prewhere for example.
+    /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
+    /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
+    /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
+    /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
+    /// If column SessionType is removed by PREWHERE actions, we use zero as defaut, and get a wrong result.
+    ///
+    /// So, here we restore removed inputs for PREWHERE actions
+    {
+        // const auto & virtuals = read_from_merge_tree->getVirtualColumnNames();
+        // NameSet virtual_names(virtuals.begin(), virtuals.end());
+
+        //std::unordered_set<const ActionsDAG::Node *> first_inputs(split_result.first->getInputs().begin(), split_result.first->getInputs().end());
+        std::unordered_set<const ActionsDAG::Node *> first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
+        ///std::unordered_set<const ActionsDAG::Node *> second_inputs(split_result.second->getInputs().begin(), split_result.second->getInputs().end());
+
+        for (const auto * input : split_result.first->getInputs())
+        {
+            if (!first_outputs.contains(input))
+            {
+                split_result.first->getOutputs().push_back(input);
+                /// Add column to second actions as input.
+                /// Do not add it to result, so it would be removed.
+                split_result.second->addInput(input->result_name, input->result_type);
+            }
+        }
+
+        // NameSet input_columns;
+        // for (const auto * input : split_result.first->getInputs())
+        //     input_columns.insert(input->result_name);
+
+        // auto header = read_from_merge_tree->getStorageSnapshot()->getSampleBlockForColumns(read_from_merge_tree->getRealColumnNames());
+        // header = MergeTreeSelectProcessor::transformHeader(std::move(header), prewhere_info, {}, {});
+    }
+
     ActionsDAG::NodeRawConstPtrs conditions;
     conditions.reserve(split_result.split_nodes_mapping.size());
     for (const auto * condition : optimize_result.prewhere_nodes)

From 10c09b5817cd13b34d662728c95d9098e7dc7f32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 7 Feb 2024 20:23:52 +0100
Subject: [PATCH 449/884] Enable MSAN in mem*Overflow15 functions

---
 src/Common/MemorySanitizer.h |  5 ++++
 src/Common/memcmpSmall.h     | 48 +++++++++++++++++++++++++++++++++---
 src/Common/memcpySmall.h     |  4 +++
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h
index d1b8fd07f33..2c4ae3a6cfb 100644
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@@ -13,6 +13,9 @@
 #undef __msan_unpoison_string
 
 #define __msan_unpoison(X, Y) /// NOLINT
+/// Given a pointer and **its size**, unpoisons up to 15 bytes **at the end**
+/// See memcmpSmall.h / memcpySmall.h
+#define __msan_unpoison_overflow_15(X, Y) /// NOLINT
 #define __msan_test_shadow(X, Y) (false) /// NOLINT
 #define __msan_print_shadow(X, Y) /// NOLINT
 #define __msan_unpoison_string(X) /// NOLINT
@@ -24,6 +27,8 @@
 #        undef __msan_print_shadow
 #        undef __msan_unpoison_string
 #        include <sanitizer/msan_interface.h>
+#        undef __msan_unpoison_overflow_15
+#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE) - ((PTR_SIZE) % 16)], ((PTR_SIZE) % 16))
 #    endif
 #endif
 
diff --git a/src/Common/memcmpSmall.h b/src/Common/memcmpSmall.h
index 36d5d7efab8..0b05b4a53bd 100644
--- a/src/Common/memcmpSmall.h
+++ b/src/Common/memcmpSmall.h
@@ -7,6 +7,7 @@
 #include <base/simd.h>
 
 #include <Core/Defines.h>
+#include <Common/MemorySanitizer.h>
 
 
 namespace detail
@@ -26,9 +27,8 @@ inline int cmp(T a, T b)
 
 
 /// We can process uninitialized memory in the functions below.
-/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it.
-/// Disable optimized functions if compile with Memory Sanitizer.
-#if defined(__AVX512BW__) && defined(__AVX512VL__) && !defined(MEMORY_SANITIZER)
+/// Results don't depend on the values inside uninitialized memory
+#if defined(__AVX512BW__) && defined(__AVX512VL__)
 #    include <immintrin.h>
 
 
@@ -42,6 +42,9 @@ inline int cmp(T a, T b)
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -74,6 +77,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
 template <typename Char>
 inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -144,6 +150,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
 {
+    __msan_unpoison_overflow_15(a, size);
+    __msan_unpoison_overflow_15(b, size);
+
     for (size_t offset = 0; offset < size; offset += 16)
     {
         uint16_t mask = _mm_cmp_epi8_mask(
@@ -174,6 +183,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
     if (a_size != b_size)
         return false;
 
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     for (size_t offset = 0; offset < a_size; offset += 16)
     {
         uint16_t mask = _mm_cmp_epi8_mask(
@@ -246,6 +258,7 @@ inline bool memequal16(const void * a, const void * b)
 /** Compare memory region to zero */
 inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
 {
+    __msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
     const __m128i zero16 = _mm_setzero_si128();
 
     for (size_t offset = 0; offset < size; offset += 16)
@@ -263,7 +276,7 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
     return true;
 }
 
-#elif defined(__SSE2__) && !defined(MEMORY_SANITIZER)
+#elif defined(__SSE2__)
 #    include <emmintrin.h>
 
 
@@ -277,6 +290,9 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -309,6 +325,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
 template <typename Char>
 inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -380,6 +399,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
 {
+    __msan_unpoison_overflow_15(a, size);
+    __msan_unpoison_overflow_15(b, size);
+
     for (size_t offset = 0; offset < size; offset += 16)
     {
         uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
@@ -410,6 +432,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
     if (a_size != b_size)
         return false;
 
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     for (size_t offset = 0; offset < a_size; offset += 16)
     {
         uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
@@ -483,6 +508,8 @@ inline bool memequal16(const void * a, const void * b)
 /** Compare memory region to zero */
 inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
 {
+    __msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
+
     const __m128i zero16 = _mm_setzero_si128();
 
     for (size_t offset = 0; offset < size; offset += 16)
@@ -509,6 +536,9 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -534,6 +564,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
 template <typename Char>
 inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
 {
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
+
     size_t min_size = std::min(a_size, b_size);
 
     for (size_t offset = 0; offset < min_size; offset += 16)
@@ -599,6 +632,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
 template <typename Char>
 inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
 {
+    __msan_unpoison_overflow_15(a, size);
+    __msan_unpoison_overflow_15(b, size);
+
     for (size_t offset = 0; offset < size; offset += 16)
     {
         uint64_t mask = getNibbleMask(vceqq_u8(
@@ -625,6 +661,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
     if (a_size != b_size)
         return false;
 
+    __msan_unpoison(&a[a_size - a_size % 16], a_size % 16);
+    __msan_unpoison(&b[b_size - b_size % 16], b_size % 16);
+
     for (size_t offset = 0; offset < a_size; offset += 16)
     {
         uint64_t mask = getNibbleMask(vceqq_u8(
@@ -683,6 +722,7 @@ inline bool memequal16(const void * a, const void * b)
 
 inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
 {
+    __msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
     for (size_t offset = 0; offset < size; offset += 16)
     {
         uint64_t mask = getNibbleMask(vceqzq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(data) + offset)));
diff --git a/src/Common/memcpySmall.h b/src/Common/memcpySmall.h
index 0c2aee96250..f3d26c60380 100644
--- a/src/Common/memcpySmall.h
+++ b/src/Common/memcpySmall.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <Common/MemorySanitizer.h>
+
 #include <cstring>
 #include <sys/types.h> /// ssize_t
 
@@ -38,6 +40,7 @@ namespace detail
 {
     inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n)
     {
+        __msan_unpoison_overflow_15(src, n);
         while (n > 0)
         {
             _mm_storeu_si128(reinterpret_cast<__m128i *>(dst),
@@ -64,6 +67,7 @@ namespace detail
 {
 inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n)
 {
+    __msan_unpoison_overflow_15(src, n);
     while (n > 0)
     {
         vst1q_s8(reinterpret_cast<signed char *>(dst), vld1q_s8(reinterpret_cast<const signed char *>(src)));

From 33401643a409e28c7bfafff6e15bcee0579dbaed Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 20:28:37 +0100
Subject: [PATCH 450/884] Fix error

---
 docker/test/base/setup_export_logs.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 156adb1d1e4..6209336ab4e 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -163,9 +163,10 @@ function setup_logs_replication
                 EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
             fi
         elif [[ "$table" = "coverage_log" ]]
+        then
             EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_COVERAGE_LOG}"
             EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_COVERAGE_LOG}"
-        then
+        else
             EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
             EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
         fi

From 4d25929d74b221a29436f26cd4abdc5ba692059b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 20:58:40 +0100
Subject: [PATCH 451/884] A few missing changes

---
 docker/packager/packager | 6 +++---
 tests/ci/report.py       | 9 +++++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 8efd3b8f302..6af5f97e6ce 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -115,8 +115,8 @@ def run_docker_image_with_env(
     subprocess.check_call(cmd, shell=True)
 
 
-def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool:
-    return not debug_build and package_type == "deb" and sanitizer == ""
+def is_release_build(debug_build: bool, package_type: str, sanitizer: str, coverage: bool) -> bool:
+    return not debug_build and package_type == "deb" and sanitizer == "" and not coverage
 
 
 def parse_env_variables(
@@ -262,7 +262,7 @@ def parse_env_variables(
         build_target = (
             f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
         )
-        if is_release_build(debug_build, package_type, sanitizer):
+        if is_release_build(debug_build, package_type, sanitizer, coverage):
             cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
             result.append("WITH_PERFORMANCE=1")
             if is_cross_arm:
diff --git a/tests/ci/report.py b/tests/ci/report.py
index ce20c7293f9..9d43830a755 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -449,6 +449,12 @@ class BuildResult:
             return self._wrong_config_message
         return self.build_config.sanitizer
 
+    @property
+    def coverage(self) -> str:
+        if self.build_config is None:
+            return self._wrong_config_message
+        return self.build_config.coverage
+
     @property
     def grouped_urls(self) -> List[List[str]]:
         "Combine and preserve build_urls by artifact types"
@@ -775,6 +781,7 @@ HTML_BASE_BUILD_TEMPLATE = (
 <th>Build type</th>
 <th>Version</th>
 <th>Sanitizer</th>
+<th>Coverage</th>
 <th>Status</th>
 <th>Build log</th>
 <th>Build time</th>
@@ -816,6 +823,8 @@ def create_build_html_report(
             else:
                 row.append("<td>none</td>")
 
+            row.append(f"<td>{build_result.coverage}</td>")
+
             if build_result.status:
                 style = _get_status_style(build_result.status)
                 row.append(f'<td style="{style}">{build_result.status}</td>')

From 1c71a27527262db3034f067aef0e159d85306c6b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 7 Feb 2024 20:05:39 +0000
Subject: [PATCH 452/884] Remove some comments

---
 src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 9d997584a28..eb5f7a42819 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -142,16 +142,13 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     // if (!settings.allow_experimental_analyzer)
     //     return;
 
-    //const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
-    bool is_final = read_from_merge_tree->isQueryWithFinal(); //table_expression_modifiers && table_expression_modifiers->hasFinal();
+    bool is_final = read_from_merge_tree->isQueryWithFinal();
     bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
-    // std::cerr << "============ !!! << " << is_final << ' ' << settings.optimize_move_to_prewhere_if_final << std::endl;
     if (!optimize_move_to_prewhere)
         return;
 
     const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();
 
-    //if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
     if (read_from_merge_tree->isQueryWithSampling())
     {
         const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();

From 64d9d2d467dd187647c88fde23e21c3af92d8439 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 7 Feb 2024 20:44:35 +0000
Subject: [PATCH 453/884] Automatic style fix

---
 docker/packager/packager | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 6af5f97e6ce..ca0ae8358f3 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -115,8 +115,12 @@ def run_docker_image_with_env(
     subprocess.check_call(cmd, shell=True)
 
 
-def is_release_build(debug_build: bool, package_type: str, sanitizer: str, coverage: bool) -> bool:
-    return not debug_build and package_type == "deb" and sanitizer == "" and not coverage
+def is_release_build(
+    debug_build: bool, package_type: str, sanitizer: str, coverage: bool
+) -> bool:
+    return (
+        not debug_build and package_type == "deb" and sanitizer == "" and not coverage
+    )
 
 
 def parse_env_variables(

From 48a48be13500a7eb5cbc8cacef529e872ffb1657 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 7 Feb 2024 22:28:43 +0100
Subject: [PATCH 454/884] Fix leftPad / rightPad function with FixedString
 input

---
 src/Functions/padString.cpp                   |   3 +-
 .../02986_leftpad_fixedstring.reference       | 124 ++++++++++++++++++
 .../0_stateless/02986_leftpad_fixedstring.sql |  49 +++++++
 3 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02986_leftpad_fixedstring.reference
 create mode 100644 tests/queries/0_stateless/02986_leftpad_fixedstring.sql

diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp
index d0f22aeeb3b..b26a4ec3d6a 100644
--- a/src/Functions/padString.cpp
+++ b/src/Functions/padString.cpp
@@ -1,5 +1,6 @@
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/GatherUtils/Algorithms.h>
@@ -188,7 +189,7 @@ namespace
                     arguments[2]->getName(),
                     getName());
 
-            return arguments[0];
+            return std::make_shared<DataTypeString>();
         }
 
         ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
diff --git a/tests/queries/0_stateless/02986_leftpad_fixedstring.reference b/tests/queries/0_stateless/02986_leftpad_fixedstring.reference
new file mode 100644
index 00000000000..32ebe0af9b6
--- /dev/null
+++ b/tests/queries/0_stateless/02986_leftpad_fixedstring.reference
@@ -0,0 +1,124 @@
+	
+a	String
+	
+a	String
+	1	1	1
+61	1	1	1
+6162	1	1	1
+616263	1	1	1
+61626333	1	1	1
+6162633334	1	1	1
+616263333433	1	1	1
+61626333343332	1	1	1
+6162633334333234	1	1	1
+206162633334333234	1	1	1
+20206162633334333234	1	1	1
+2020206162633334333234	1	1	1
+202020206162633334333234	1	1	1
+20202020206162633334333234	1	1	1
+2020202020206162633334333234	1	1	1
+202020202020206162633334333234	1	1	1
+20202020202020206162633334333234	1	1	1
+2020202020202020206162633334333234	1	1	1
+202020202020202020206162633334333234	1	1	1
+20202020202020202020206162633334333234	1	1	1
+	1	1	1
+61	1	1	1
+6162	1	1	1
+616263	1	1	1
+61626333	1	1	1
+6162633334	1	1	1
+616263333433	1	1	1
+61626333343332	1	1	1
+6162633334333234	1	1	1
+616263333433323420	1	1	1
+61626333343332342020	1	1	1
+6162633334333234202020	1	1	1
+616263333433323420202020	1	1	1
+61626333343332342020202020	1	1	1
+6162633334333234202020202020	1	1	1
+616263333433323420202020202020	1	1	1
+61626333343332342020202020202020	1	1	1
+6162633334333234202020202020202020	1	1	1
+616263333433323420202020202020202020	1	1	1
+61626333343332342020202020202020202020	1	1	1
+	1	1	1
+61	1	1	1
+6162	1	1	1
+616263	1	1	1
+61626333	1	1	1
+6162633334	1	1	1
+616263333433	1	1	1
+61626333343332	1	1	1
+6162633334333234	1	1	1
+F09F87AA6162633334333234	1	1	1
+F09F87AAF09F87B86162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
+	1	1	1
+61	1	1	1
+6162	1	1	1
+616263	1	1	1
+61626333	1	1	1
+6162633334	1	1	1
+616263333433	1	1	1
+61626333343332	1	1	1
+6162633334333234	1	1	1
+6162633334333234F09F87AA	1	1	1
+6162633334333234F09F87AAF09F87B8	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AA	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
+	1	1	1
+F09F87AA	1	1	1
+F09F87AAF09F87B8	1	1	1
+C391F09F87AAF09F87B8	1	1	1
+C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
+	1	1	1
+F09F87AA	1	1	1
+F09F87AAF09F87B8	1	1	1
+F09F87AAF09F87B8C391	1	1	1
+F09F87AAF09F87B8C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
diff --git a/tests/queries/0_stateless/02986_leftpad_fixedstring.sql b/tests/queries/0_stateless/02986_leftpad_fixedstring.sql
new file mode 100644
index 00000000000..cbc7f4646a5
--- /dev/null
+++ b/tests/queries/0_stateless/02986_leftpad_fixedstring.sql
@@ -0,0 +1,49 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/59604
+SELECT leftPad(toFixedString('abc', 3), 0), leftPad('abc', CAST('0', 'Int32'));
+SELECT leftPad(toFixedString('abc343243424324', 15), 1) as a, toTypeName(a);
+
+SELECT rightPad(toFixedString('abc', 3), 0), rightPad('abc', CAST('0', 'Int32'));
+SELECT rightPad(toFixedString('abc343243424324', 15), 1) as a, toTypeName(a);
+
+SELECT
+    hex(leftPad(toFixedString('abc34324' as s, 8), number)) as result,
+    hex(leftPad(s, number)) = result,
+    hex(leftPadUTF8(toFixedString(s, 8), number)) = result,
+    hex(leftPadUTF8(s, number)) = result
+FROM numbers(20);
+
+SELECT
+    hex(rightPad(toFixedString('abc34324' as s, 8), number)) as result,
+    hex(rightPad(s, number)) = result,
+    hex(rightPadUTF8(toFixedString(s, 8), number)) = result,
+    hex(rightPadUTF8(s, number)) = result
+FROM numbers(20);
+
+-- I'm not confident the behaviour should be like this. I'm only testing memory problems
+SELECT
+    hex(leftPadUTF8(toFixedString('abc34324' as s, 8), number, '🇪🇸')) as result,
+    hex(leftPadUTF8(s, number, '🇪🇸')) = result,
+    hex(leftPadUTF8(toFixedString(s, 8), number, '🇪🇸')) = result,
+    hex(leftPadUTF8(s, number, '🇪🇸')) = result
+FROM numbers(20);
+
+SELECT
+    hex(rightPadUTF8(toFixedString('abc34324' as s, 8), number, '🇪🇸')) as result,
+    hex(rightPadUTF8(s, number, '🇪🇸')) = result,
+    hex(rightPadUTF8(toFixedString(s, 8), number, '🇪🇸')) = result,
+    hex(rightPadUTF8(s, number, '🇪🇸')) = result
+FROM numbers(20);
+
+SELECT
+    hex(leftPadUTF8(toFixedString('🇪🇸' as s, 8), number, 'Ñ')) as result,
+    hex(leftPadUTF8(s, number, 'Ñ')) = result,
+    hex(leftPadUTF8(toFixedString(s, 8), number, 'Ñ')) = result,
+    hex(leftPadUTF8(s, number, 'Ñ')) = result
+FROM numbers(20);
+
+SELECT
+    hex(rightPadUTF8(toFixedString('🇪🇸' as s, 8), number, 'Ñ')) as result,
+    hex(rightPadUTF8(s, number, 'Ñ')) = result,
+    hex(rightPadUTF8(toFixedString(s, 8), number, 'Ñ')) = result,
+    hex(rightPadUTF8(s, number, 'Ñ')) = result
+FROM numbers(20);

From 62721fef781cfde424701b2148c9595d2705f088 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Feb 2024 22:45:35 +0100
Subject: [PATCH 455/884] Fix your Py

---
 tests/ci/report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index 9d43830a755..8b83566eb57 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -450,7 +450,7 @@ class BuildResult:
         return self.build_config.sanitizer
 
     @property
-    def coverage(self) -> str:
+    def coverage(self) -> bool:
         if self.build_config is None:
             return self._wrong_config_message
         return self.build_config.coverage

From 85bd8d3cf0fe71963db819b2bbb2b93728353f87 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Feb 2024 00:23:04 +0100
Subject: [PATCH 456/884] Fix your Py

---
 tests/ci/report.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index 8b83566eb57..de94c23bc2c 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -450,10 +450,10 @@ class BuildResult:
         return self.build_config.sanitizer
 
     @property
-    def coverage(self) -> bool:
+    def coverage(self) -> str:
         if self.build_config is None:
             return self._wrong_config_message
-        return self.build_config.coverage
+        return str(self.build_config.coverage)
 
     @property
     def grouped_urls(self) -> List[List[str]]:

From 12a71375b8a482ef9f64bcc970b5e1ae03758daa Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 8 Feb 2024 00:44:21 +0100
Subject: [PATCH 457/884] Fix skipping unused shards with analyzer.

---
 src/Interpreters/ActionsDAG.cpp               | 19 +++++++++++++------
 src/Interpreters/ActionsDAG.h                 |  1 +
 .../evaluateConstantExpression.cpp            |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 6512def9202..a1858916ca7 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -605,7 +605,7 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem
     return actions;
 }
 
-static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments)
+static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments, size_t input_rows_count)
 {
     ColumnWithTypeAndName res_column;
     res_column.type = node->result_type;
@@ -615,7 +615,7 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
     {
         case ActionsDAG::ActionType::FUNCTION:
         {
-            res_column.column = node->function->execute(arguments, res_column.type, 0, true);
+            res_column.column = node->function->execute(arguments, res_column.type, input_rows_count, true);
             break;
         }
 
@@ -628,13 +628,17 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
             if (!array)
                 throw Exception(ErrorCodes::TYPE_MISMATCH,
                                 "ARRAY JOIN of not array nor map: {}", node->result_name);
-            res_column.column = array->getDataPtr()->cloneEmpty();
+            res_column.column = array->getDataPtr();
+            if (input_rows_count < array->size())
+                res_column.column = res_column.column->cloneResized(array->getOffsets()[input_rows_count - 1]);
             break;
         }
 
         case ActionsDAG::ActionType::COLUMN:
         {
-            res_column.column = node->column->cloneResized(0);
+            res_column.column = node->column;
+            if (input_rows_count < res_column.column->size())
+                res_column.column = res_column.column->cloneResized(input_rows_count);
             break;
         }
 
@@ -681,7 +685,7 @@ Block ActionsDAG::updateHeader(Block header) const
     ColumnsWithTypeAndName result_columns;
     try
     {
-        result_columns = evaluatePartialResult(node_to_column, outputs, true);
+        result_columns = evaluatePartialResult(node_to_column, outputs, /* input_rows_count= */ 0, /* throw_on_error= */ true);
     }
     catch (Exception & e)
     {
@@ -710,8 +714,11 @@ Block ActionsDAG::updateHeader(Block header) const
 ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
     IntermediateExecutionResult & node_to_column,
     const NodeRawConstPtrs & outputs,
+    size_t input_rows_count,
     bool throw_on_error)
 {
+    chassert(input_rows_count <= 1); /// evaluatePartialResult() should be used only to evaluate headers or constants
+
     ColumnsWithTypeAndName result_columns;
     result_columns.reserve(outputs.size());
 
@@ -768,7 +775,7 @@ ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
                                         node->result_name);
 
                     if (node->type != ActionsDAG::ActionType::INPUT && has_all_arguments)
-                        node_to_column[node] = executeActionForHeader(node, std::move(arguments));
+                        node_to_column[node] = executeActionForPartialResult(node, std::move(arguments), input_rows_count);
                 }
             }
 
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 45f6e5cc717..e6272ac9f60 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -278,6 +278,7 @@ public:
     static ColumnsWithTypeAndName evaluatePartialResult(
         IntermediateExecutionResult & node_to_column,
         const NodeRawConstPtrs & outputs,
+        size_t input_rows_count,
         bool throw_on_error);
 
     /// For apply materialize() function for every output.
diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index af8bd19370b..00d36750cc1 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -661,7 +661,7 @@ namespace
         const ActionsDAG::NodeRawConstPtrs & target_expr,
         ConjunctionMap && conjunction)
     {
-        auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, false);
+        auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, /* input_rows_count= */ 1, /* throw_on_error= */ false);
         for (const auto & column : columns)
             if (!column.column)
                 return {};

From ce92f1338969d2e54c010c2920aef22aca219156 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 7 Feb 2024 20:02:21 -0800
Subject: [PATCH 458/884] [Docs] More details on quorum inserts on SMT

---
 docs/en/operations/settings/settings.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 59d8bf2d858..fc73cf15027 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1770,7 +1770,7 @@ Default value: 0 (no restriction).
 ## insert_quorum {#insert_quorum}
 
 :::note
-`insert_quorum` does not apply to ClickHouse Cloud as all inserts are quorum inserted when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree).
+`insert_quorum` does not apply when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree) in ClickHouse Cloud as all inserts are quorum inserted.
 :::
 
 Enables the quorum writes.
@@ -1813,7 +1813,7 @@ See also:
 ## insert_quorum_parallel {#insert_quorum_parallel}
 
 :::note
-`insert_quorum_parallel` does not apply to ClickHouse Cloud as all inserts are quorum inserted when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree).
+`insert_quorum_parallel` does not apply when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree) in ClickHouse Cloud as all inserts are quorum inserted.
 :::
 
 Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.

From 560e78cf418ada54d23e34f25b88e8b93bcf3411 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 7 Feb 2024 21:08:30 -0800
Subject: [PATCH 459/884] [Docs] More details for Memory table engine in Cloud

---
 docs/en/engines/table-engines/special/memory.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md
index a30f620f5c5..54547b1bc69 100644
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@@ -7,7 +7,10 @@ sidebar_label:  Memory
 # Memory Table Engine
 
 :::note
-When using the Memory table engine on ClickHouse Cloud, you must use a client that uses TCP (such as [clickhouse-client](/en/interfaces/cli)) or a native interface, and not one that uses HTTP (such as [clickhouse-connect](/en/integrations/python)). If you use HTTP, all queries must be submitted and executed at once using a multi-statement query.
+When using the Memory table engine on ClickHouse Cloud, data is not replicated across all nodes (by design). To guarantee that all queries are routed to the same node and that the Memory table engine works as expected, you can do one of the following:
+- Execute all operations in the same session
+- Use a client that uses TCP or the native interface (which enables support for sticky connections) such as [clickhouse-client](/en/interfaces/cli)
+- Submit and execute all queries at once using a multi-statement query (required with clients using the HTTP interface such as [clickhouse-connect](/en/integrations/python))
 :::
 
 The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.

From ece4febe3265e542f979d57bd435127a8243bf0f Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Wed, 7 Feb 2024 01:35:48 +0000
Subject: [PATCH 460/884] Insert synchronously if dependent MV deduplication is
 enabled

---
 src/Interpreters/executeQuery.cpp             |  2 +
 src/Server/TCPHandler.cpp                     |  2 +-
 ...c_inserts_for_dependent_mv_dedup.reference |  1 +
 ...e_async_inserts_for_dependent_mv_dedup.sql | 46 +++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
 create mode 100644 tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 1787f627c2e..ce6d1de4af4 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -933,6 +933,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                 reason = "asynchronous insert queue is not configured";
             else if (insert_query->select)
                 reason = "insert query has select";
+            else if (settings.deduplicate_blocks_in_dependent_materialized_views)
+                reason = "dependent materialized views block deduplication is enabled";
             else if (insert_query->hasInlinedData())
                 async_insert = true;
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 2c4e9c1e3b2..e1086ac5833 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -933,7 +933,7 @@ void TCPHandler::processInsertQuery()
         if (auto table = DatabaseCatalog::instance().tryGetTable(insert_query.table_id, query_context))
             async_insert_enabled |= table->areAsynchronousInsertsEnabled();
 
-    if (insert_queue && async_insert_enabled && !insert_query.select)
+    if (insert_queue && async_insert_enabled && !insert_query.select && !settings.deduplicate_blocks_in_dependent_materialized_views)
     {
         auto result = processAsyncInsertQuery(*insert_queue);
         if (result.status == AsynchronousInsertQueue::PushResult::OK)
diff --git a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
new file mode 100644
index 00000000000..4ff73b99975
--- /dev/null
+++ b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
@@ -0,0 +1 @@
+Values	Ok	4	Parsed
diff --git a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql
new file mode 100644
index 00000000000..41b23374bfc
--- /dev/null
+++ b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql
@@ -0,0 +1,46 @@
+-- Tags: no-parallel
+
+SET async_insert = 1;
+SET insert_deduplicate = 1;
+SET deduplicate_blocks_in_dependent_materialized_views = 1;
+
+DROP TABLE IF EXISTS 02985_test;
+CREATE TABLE 02985_test
+(
+    d Date,
+    value UInt64
+) ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
+
+DROP VIEW IF EXISTS 02985_mv;
+CREATE MATERIALIZED VIEW 02985_mv
+ENGINE = SummingMergeTree ORDER BY d AS
+SELECT
+    d, sum(value) s
+FROM 02985_test GROUP BY d;
+
+-- Inserts are synchronous.
+INSERT INTO 02985_test (*)
+VALUES ('2024-01-01', 1), ('2024-01-01', 2), ('2024-01-02', 1);
+
+SYSTEM FLUSH LOGS;
+
+SELECT format, status, rows, data_kind  FROM system.asynchronous_insert_log
+WHERE database = currentDatabase() AND table = '02985_test';
+
+SET deduplicate_blocks_in_dependent_materialized_views = 0;
+
+-- Set a large value for async_insert_busy_timeout_max_ms to avoid flushing the entry synchronously.
+INSERT INTO 02985_test (*)
+SETTINGS
+    async_insert_busy_timeout_min_ms=200,
+    async_insert_busy_timeout_max_ms=100000
+VALUES ('2024-01-01', 1), ('2024-01-01', 2), ('2024-01-02', 1), ('2024-01-02', 4);
+
+SYSTEM FLUSH LOGS;
+
+SELECT format, status, rows, data_kind
+FROM system.asynchronous_insert_log
+WHERE database = currentDatabase() AND table = '02985_test';
+
+DROP VIEW IF EXISTS 02985_mv;
+DROP TABLE IF EXISTS 02985_test;

From c2019b3b1eb3b09803ecbbce8bd9bfe0560faf36 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 8 Feb 2024 09:08:44 +0000
Subject: [PATCH 461/884] Fix test

---
 tests/queries/0_stateless/02972_parallel_replicas_cte.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql
index c39ad172a27..3702184e336 100644
--- a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql
+++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql
@@ -12,16 +12,16 @@ SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.
 
 WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000)
 SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a
-SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3;
 
 -- Testing that it is disabled for allow_experimental_analyzer=0. With analyzer it will be supported (with correct result)
 WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000)
 SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a
-SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3; -- { serverError SUPPORT_IS_DISABLED }
+SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; -- { serverError SUPPORT_IS_DISABLED }
 
 -- Sanitizer
 SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number
-SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3;
 
 DROP TABLE IF EXISTS pr_1;
 DROP TABLE IF EXISTS pr_2;

From ffff6b9510d96d3885e6f3559065e888a813c5de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 10:56:17 +0100
Subject: [PATCH 462/884] Cleanup duplicated tests

---
 .../02986_leftpad_fixedstring.reference       | 160 +++++++++---------
 .../0_stateless/02986_leftpad_fixedstring.sql |   8 -
 2 files changed, 80 insertions(+), 88 deletions(-)

diff --git a/tests/queries/0_stateless/02986_leftpad_fixedstring.reference b/tests/queries/0_stateless/02986_leftpad_fixedstring.reference
index 32ebe0af9b6..8e51d03d0a8 100644
--- a/tests/queries/0_stateless/02986_leftpad_fixedstring.reference
+++ b/tests/queries/0_stateless/02986_leftpad_fixedstring.reference
@@ -42,83 +42,83 @@ a	String
 6162633334333234202020202020202020	1	1	1
 616263333433323420202020202020202020	1	1	1
 61626333343332342020202020202020202020	1	1	1
-	1	1	1
-61	1	1	1
-6162	1	1	1
-616263	1	1	1
-61626333	1	1	1
-6162633334	1	1	1
-616263333433	1	1	1
-61626333343332	1	1	1
-6162633334333234	1	1	1
-F09F87AA6162633334333234	1	1	1
-F09F87AAF09F87B86162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1	1	1
-F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1	1	1
-	1	1	1
-61	1	1	1
-6162	1	1	1
-616263	1	1	1
-61626333	1	1	1
-6162633334	1	1	1
-616263333433	1	1	1
-61626333343332	1	1	1
-6162633334333234	1	1	1
-6162633334333234F09F87AA	1	1	1
-6162633334333234F09F87AAF09F87B8	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AA	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1	1	1
-6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1	1	1
-	1	1	1
-F09F87AA	1	1	1
-F09F87AAF09F87B8	1	1	1
-C391F09F87AAF09F87B8	1	1	1
-C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1	1	1
-	1	1	1
-F09F87AA	1	1	1
-F09F87AAF09F87B8	1	1	1
-F09F87AAF09F87B8C391	1	1	1
-F09F87AAF09F87B8C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
-F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1	1	1
+	1
+61	1
+6162	1
+616263	1
+61626333	1
+6162633334	1
+616263333433	1
+61626333343332	1
+6162633334333234	1
+F09F87AA6162633334333234	1
+F09F87AAF09F87B86162633334333234	1
+F09F87AAF09F87B8F09F87AA6162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B86162633334333234	1
+F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA6162633334333234	1
+	1
+61	1
+6162	1
+616263	1
+61626333	1
+6162633334	1
+616263333433	1
+61626333343332	1
+6162633334333234	1
+6162633334333234F09F87AA	1
+6162633334333234F09F87AAF09F87B8	1
+6162633334333234F09F87AAF09F87B8F09F87AA	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8	1
+6162633334333234F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AAF09F87B8F09F87AA	1
+	1
+F09F87AA	1
+F09F87AAF09F87B8	1
+C391F09F87AAF09F87B8	1
+C391C391F09F87AAF09F87B8	1
+C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391F09F87AAF09F87B8	1
+	1
+F09F87AA	1
+F09F87AAF09F87B8	1
+F09F87AAF09F87B8C391	1
+F09F87AAF09F87B8C391C391	1
+F09F87AAF09F87B8C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1
+F09F87AAF09F87B8C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391C391	1
diff --git a/tests/queries/0_stateless/02986_leftpad_fixedstring.sql b/tests/queries/0_stateless/02986_leftpad_fixedstring.sql
index cbc7f4646a5..eaed9b3adc6 100644
--- a/tests/queries/0_stateless/02986_leftpad_fixedstring.sql
+++ b/tests/queries/0_stateless/02986_leftpad_fixedstring.sql
@@ -22,28 +22,20 @@ FROM numbers(20);
 -- I'm not confident the behaviour should be like this. I'm only testing memory problems
 SELECT
     hex(leftPadUTF8(toFixedString('abc34324' as s, 8), number, '🇪🇸')) as result,
-    hex(leftPadUTF8(s, number, '🇪🇸')) = result,
-    hex(leftPadUTF8(toFixedString(s, 8), number, '🇪🇸')) = result,
     hex(leftPadUTF8(s, number, '🇪🇸')) = result
 FROM numbers(20);
 
 SELECT
     hex(rightPadUTF8(toFixedString('abc34324' as s, 8), number, '🇪🇸')) as result,
-    hex(rightPadUTF8(s, number, '🇪🇸')) = result,
-    hex(rightPadUTF8(toFixedString(s, 8), number, '🇪🇸')) = result,
     hex(rightPadUTF8(s, number, '🇪🇸')) = result
 FROM numbers(20);
 
 SELECT
     hex(leftPadUTF8(toFixedString('🇪🇸' as s, 8), number, 'Ñ')) as result,
-    hex(leftPadUTF8(s, number, 'Ñ')) = result,
-    hex(leftPadUTF8(toFixedString(s, 8), number, 'Ñ')) = result,
     hex(leftPadUTF8(s, number, 'Ñ')) = result
 FROM numbers(20);
 
 SELECT
     hex(rightPadUTF8(toFixedString('🇪🇸' as s, 8), number, 'Ñ')) as result,
-    hex(rightPadUTF8(s, number, 'Ñ')) = result,
-    hex(rightPadUTF8(toFixedString(s, 8), number, 'Ñ')) = result,
     hex(rightPadUTF8(s, number, 'Ñ')) = result
 FROM numbers(20);

From 3167bfe423d48a99aed758c7b032d360d682b8f0 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 8 Feb 2024 11:35:23 +0100
Subject: [PATCH 463/884] Bump server version

---
 tests/analyzer_integration_broken_tests.txt          | 1 -
 tests/integration/test_replicating_constants/test.py | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c04ed440c18..89580771bc9 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -10,7 +10,6 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
-test_replicating_constants/test.py::test_different_versions
 test_select_access_rights/test_main.py::test_alias_columns
 test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote
diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py
index 9669e890cd3..fbf7450577f 100644
--- a/tests/integration/test_replicating_constants/test.py
+++ b/tests/integration/test_replicating_constants/test.py
@@ -9,9 +9,8 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="19.16.9.37",
+    tag="22.3",
     with_installed_binary=True,
-    allow_analyzer=False,
 )
 
 
From cfa1a452e1b3ea8b8f5c5c5af269ea0c0a35a8a9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 8 Feb 2024 11:27:24 +0000
Subject: [PATCH 464/884] Refactorings,  pt. I

---
 .../functions/string-search-functions.md      |  4 ++
 src/Functions/countMatches.h                  | 43 ++++++++-----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index d5dbca3f2b7..22f879c62ae 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -590,6 +590,10 @@ Result:
 └───────────────────────────────┘
 ```
 
+## countMatchesCaseInsensitive
+
+Like `countMatches(haystack, pattern)` but matching ignores the case.
+
 ## regexpExtract
 
 Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index.
diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 5e02915de56..4f37c9e7c8b 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -35,39 +35,32 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (!isStringOrFixedString(arguments[1].type))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of second argument (pattern) of function {}. Must be String/FixedString.",
-                arguments[1].type->getName(), getName());
-        if (!isStringOrFixedString(arguments[0].type))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of first argument (haystack) of function {}. Must be String/FixedString.",
-                arguments[0].type->getName(), getName());
-        const auto * column = arguments[1].column.get();
-        if (!column || !checkAndGetColumnConstStringOrFixedString(column))
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "The second argument of function {} should be a constant string with the pattern",
-                getName());
+        FunctionArgumentDescriptors args{
+            {"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
+            {"pattern", &isStringOrFixedString<IDataType>, isColumnConst, "constant String or FixedString"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
 
         return std::make_shared<DataTypeUInt64>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
-        const ColumnConst * column_pattern = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
-        const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(column_pattern->getValue<String>());
+        const IColumn * col_pattern = arguments[1].column.get();
+        const ColumnConst * col_pattern_const = checkAndGetColumnConstStringOrFixedString(col_pattern);
+        const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
+
+        const IColumn * col_haystack = arguments[0].column.get();
         OptimizedRegularExpression::MatchVec matches;
 
-        const IColumn * column_haystack = arguments[0].column.get();
-
-        if (const ColumnString * col_str = checkAndGetColumn<ColumnString>(column_haystack))
+        if (const ColumnString * col_haystack_string = checkAndGetColumn<ColumnString>(col_haystack))
         {
-            auto result_column = ColumnUInt64::create();
+            auto col_res = ColumnUInt64::create();
 
-            const ColumnString::Chars & src_chars = col_str->getChars();
-            const ColumnString::Offsets & src_offsets = col_str->getOffsets();
+            const ColumnString::Chars & src_chars = col_haystack_string->getChars();
+            const ColumnString::Offsets & src_offsets = col_haystack_string->getOffsets();
 
-            ColumnUInt64::Container & vec_res = result_column->getData();
+            ColumnUInt64::Container & vec_res = col_res->getData();
             vec_res.resize(input_rows_count);
 
             size_t size = src_offsets.size();
@@ -83,11 +76,11 @@ public:
                 vec_res[i] = countMatches(str, re, matches);
             }
 
-            return result_column;
+            return col_res;
         }
-        else if (const ColumnConst * col_const_str = checkAndGetColumnConstStringOrFixedString(column_haystack))
+        else if (const ColumnConst * col_haystack_const = checkAndGetColumnConstStringOrFixedString(col_haystack))
         {
-            std::string_view str = col_const_str->getDataColumn().getDataAt(0).toView();
+            std::string_view str = col_haystack_const->getDataColumn().getDataAt(0).toView();
             uint64_t matches_count = countMatches(str, re, matches);
             return result_type->createColumnConst(input_rows_count, matches_count);
         }

From f7bb000b5177a250bc44b3fadd6c5a33033ec0e2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 8 Feb 2024 11:34:08 +0000
Subject: [PATCH 465/884] Refactorings, pt. II: disallow FixedString patterns

---
 src/Functions/countMatches.h                   | 18 +++++++++---------
 .../queries/0_stateless/01595_countMatches.sql |  5 +++--
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 4f37c9e7c8b..168d8e9cb3a 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -37,7 +37,7 @@ public:
     {
         FunctionArgumentDescriptors args{
             {"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
-            {"pattern", &isStringOrFixedString<IDataType>, isColumnConst, "constant String or FixedString"}
+            {"pattern", &isString<IDataType>, isColumnConst, "constant String"}
         };
         validateFunctionArgumentTypes(*this, arguments, args);
 
@@ -47,13 +47,19 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
         const IColumn * col_pattern = arguments[1].column.get();
-        const ColumnConst * col_pattern_const = checkAndGetColumnConstStringOrFixedString(col_pattern);
+        const ColumnConst * col_pattern_const = checkAndGetColumnConst<ColumnString>(col_pattern);
         const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
 
         const IColumn * col_haystack = arguments[0].column.get();
         OptimizedRegularExpression::MatchVec matches;
 
-        if (const ColumnString * col_haystack_string = checkAndGetColumn<ColumnString>(col_haystack))
+        if (const ColumnConst * col_haystack_const = checkAndGetColumnConstStringOrFixedString(col_haystack))
+        {
+            std::string_view str = col_haystack_const->getDataColumn().getDataAt(0).toView();
+            uint64_t matches_count = countMatches(str, re, matches);
+            return result_type->createColumnConst(input_rows_count, matches_count);
+        }
+        else if (const ColumnString * col_haystack_string = checkAndGetColumn<ColumnString>(col_haystack))
         {
             auto col_res = ColumnUInt64::create();
 
@@ -78,12 +84,6 @@ public:
 
             return col_res;
         }
-        else if (const ColumnConst * col_haystack_const = checkAndGetColumnConstStringOrFixedString(col_haystack))
-        {
-            std::string_view str = col_haystack_const->getDataColumn().getDataAt(0).toView();
-            uint64_t matches_count = countMatches(str, re, matches);
-            return result_type->createColumnConst(input_rows_count, matches_count);
-        }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Error in FunctionCountMatches::getReturnTypeImpl()");
     }
diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql
index 0b170945d44..d0538b0289c 100644
--- a/tests/queries/0_stateless/01595_countMatches.sql
+++ b/tests/queries/0_stateless/01595_countMatches.sql
@@ -25,5 +25,6 @@ select countMatchesCaseInsensitive('foo.com BAR.COM baz.com bam.com', '([^. ]+)\
 select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)');
 
 select 'errors';
-select countMatches(1, 'foo') from numbers(1); -- { serverError 43 }
-select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44 }
+select countMatches(1, 'foo') from numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError ILLEGAL_COLUMN }
+select countMatches('foo', materialize('foo')) -- { serverError ILLEGAL_COLUMN }

From fc003497bbea9b21294d77ad4e0f6b481240e07b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 8 Feb 2024 11:50:20 +0000
Subject: [PATCH 466/884] Fix bug 59749

---
 src/Functions/countMatches.h                  | 21 ++++++++++++++++---
 .../0_stateless/01595_countMatches.reference  |  3 +++
 .../0_stateless/01595_countMatches.sql        |  6 +++++-
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 168d8e9cb3a..7e0cf83fb44 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -3,6 +3,7 @@
 #include <Functions/IFunction.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -69,10 +70,9 @@ public:
             ColumnUInt64::Container & vec_res = col_res->getData();
             vec_res.resize(input_rows_count);
 
-            size_t size = src_offsets.size();
             ColumnString::Offset current_src_offset = 0;
 
-            for (size_t i = 0; i < size; ++i)
+            for (size_t i = 0; i < input_rows_count; ++i)
             {
                 Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
                 current_src_offset = src_offsets[i];
@@ -84,6 +84,21 @@ public:
 
             return col_res;
         }
+        else if (const ColumnFixedString * col_haystack_fixedstring = checkAndGetColumn<ColumnFixedString>(col_haystack))
+        {
+            auto col_res = ColumnUInt64::create();
+
+            ColumnUInt64::Container & vec_res = col_res->getData();
+            vec_res.resize(input_rows_count);
+
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                std::string_view str = col_haystack_fixedstring->getDataAt(i).toView();
+                vec_res[i] = countMatches(str, re, matches);
+            }
+
+            return col_res;
+        }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Error in FunctionCountMatches::getReturnTypeImpl()");
     }
@@ -109,7 +124,7 @@ public:
             if (!matches[0].length)
                 break;
             pos += matches[0].offset + matches[0].length;
-            match_count++;
+            ++match_count;
         }
 
         return match_count;
diff --git a/tests/queries/0_stateless/01595_countMatches.reference b/tests/queries/0_stateless/01595_countMatches.reference
index c65279c0b8e..025db39f1fe 100644
--- a/tests/queries/0_stateless/01595_countMatches.reference
+++ b/tests/queries/0_stateless/01595_countMatches.reference
@@ -22,3 +22,6 @@ case insensitive
 4
 4
 errors
+FixedString
+2
+2
diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql
index d0538b0289c..5c016c52a4e 100644
--- a/tests/queries/0_stateless/01595_countMatches.sql
+++ b/tests/queries/0_stateless/01595_countMatches.sql
@@ -27,4 +27,8 @@ select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.
 select 'errors';
 select countMatches(1, 'foo') from numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError ILLEGAL_COLUMN }
-select countMatches('foo', materialize('foo')) -- { serverError ILLEGAL_COLUMN }
+select countMatches('foo', materialize('foo')); -- { serverError ILLEGAL_COLUMN }
+
+select 'FixedString';
+select countMatches(toFixedString('foobarfoo', 9), 'foo');
+select countMatches(materialize(toFixedString('foobarfoo', 9)), 'foo');

From 0130d525f40521cae5c79e4c002ea9c268f826d7 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 8 Feb 2024 12:02:28 +0000
Subject: [PATCH 467/884] fix dashboard params default values

---
 programs/server/dashboard.html | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index ae916f2527e..3c2916b6a16 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -527,10 +527,11 @@ let queries = [];
 
 /// Query parameters with predefined default values.
 /// All other parameters will be automatically found in the queries.
-let params = {
+let default_params = {
     'rounding': '60',
     'seconds': '86400'
 };
+let params = default_params;
 
 /// Palette generation for charts
 function generatePalette(baseColor, numColors) {
@@ -594,13 +595,19 @@ let plots = [];
 let charts = document.getElementById('charts');
 
 /// This is not quite correct (we cannot really parse SQL with regexp) but tolerable.
-const query_param_regexp = /\{(\w+):[^}]+\}/g;
+const query_param_regexp = /\{(\w+):([^}]+)\}/g;
 
 /// Automatically parse more parameters from the queries.
 function findParamsInQuery(query, new_params) {
+    const typeDefault = (type) => type.includes('Int') ? '0'
+        : (type.includes('Float') ? '0.0'
+        : (type.includes('Bool') ? 'false'
+        : (type.includes('Date') ? new Date().toISOString().slice(0, 10)
+        : (type.includes('UUID') ? '00000000-0000-0000-0000-000000000000'
+        : ''))));
     for (let match of query.matchAll(query_param_regexp)) {
         const name = match[1];
-        new_params[name] = params[name] || '';
+        new_params[name] = params[name] || default_params[name] || typeDefault(match[2]);
     }
 }
 

From 284ed191ab40f397fdb75416ef39df021c75f359 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 13:13:22 +0100
Subject: [PATCH 468/884] Revert "Rename a setting"

---
 src/Core/Settings.h                                         | 2 +-
 src/Functions/keyvaluepair/extractKeyValuePairs.cpp         | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.reference  | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.sql        | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 1e6a556e99e..dc863576a85 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -855,7 +855,7 @@ class IColumn;
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
+    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
index 94f02861af0..34081cddb92 100644
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@@ -43,11 +43,11 @@ class ExtractKeyValuePairs : public IFunction
             builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
         }
 
-        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row == 0;
+        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_kvp_max_pairs_per_row == 0;
 
         if (!is_number_of_pairs_unlimited)
         {
-            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row);
+            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
         }
 
         return builder.build();
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index 9a0cfdffcb5..f646583bbd3 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -293,7 +293,7 @@ SELECT
 {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'}
 -- { echoOn }
 
-SET extract_key_value_pairs_max_pairs_per_row = 2;
+SET extract_kvp_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -307,7 +307,7 @@ WITH
 SELECT
     x;
 {'key1':'value1','key2':'value2'}
-SET extract_key_value_pairs_max_pairs_per_row = 0;
+SET extract_kvp_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 4f3db3f166b..9277ba6d7ec 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -415,7 +415,7 @@ SELECT
     x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 
 -- Should fail allowed because it exceeds the max number of pairs
-SET extract_key_value_pairs_max_pairs_per_row = 1;
+SET extract_kvp_max_pairs_per_row = 1;
 WITH
     extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
     CAST(
@@ -429,7 +429,7 @@ SELECT
 
 -- { echoOn }
 
-SET extract_key_value_pairs_max_pairs_per_row = 2;
+SET extract_kvp_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -443,7 +443,7 @@ WITH
 SELECT
     x;
 
-SET extract_key_value_pairs_max_pairs_per_row = 0;
+SET extract_kvp_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH

From b8dcc233fd9d8774ad125412d44294ffac7c3de4 Mon Sep 17 00:00:00 2001
From: AlexeyGrezz <38166396+AlexeyGrezz@users.noreply.github.com>
Date: Thu, 8 Feb 2024 16:32:50 +0300
Subject: [PATCH 469/884] Update grants.md

---
 docs/ru/operations/system-tables/grants.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/operations/system-tables/grants.md b/docs/ru/operations/system-tables/grants.md
index b3ef789e95b..4485b684218 100644
--- a/docs/ru/operations/system-tables/grants.md
+++ b/docs/ru/operations/system-tables/grants.md
@@ -19,7 +19,7 @@ slug: /ru/operations/system-tables/grants
 -    `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Имя столбца, к которому предоставляется доступ.
 
 -    `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Показывает, были ли отменены некоторые привилегии. Возможные значения:
-- `0` — Строка описывает частичный отзыв.
-- `1` — Строка описывает грант.
+- `0` — Строка описывает грант.
+- `1` — Строка описывает частичный отзыв.
 
 -    `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Разрешение предоставлено с опцией `WITH GRANT OPTION`, подробнее см. [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax).

From fafd8005a05412db754a6ea595472ba59fda6f29 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 8 Feb 2024 13:51:29 +0000
Subject: [PATCH 470/884] Fixing style.

---
 src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index eb5f7a42819..2f790d9892f 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -228,14 +228,14 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
 
     /// This is the leak of abstraction.
     /// Splited actions may have inputs which are needed only for PREWHERE.
-    /// This is fine for ActionsDAG to have such a split, but it breakes defaults calculation.
+    /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
     ///
     /// See 00950_default_prewhere for example.
     /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
     /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
     /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
     /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
-    /// If column SessionType is removed by PREWHERE actions, we use zero as defaut, and get a wrong result.
+    /// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result.
     ///
     /// So, here we restore removed inputs for PREWHERE actions
     {

From 73d2ff3933ca60133c33ed61af277d616e10bf9c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 8 Feb 2024 14:55:35 +0100
Subject: [PATCH 471/884] Update MergeTask.cpp

---
 src/Storages/MergeTree/MergeTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index f7cd721b8dd..9cbcdbaaaaa 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -1066,7 +1066,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
 #ifndef NDEBUG
     if (!sort_description.empty())
     {
-        res_pipe.addSimpleTransform([&](const Block & header_)
+        builder->addSimpleTransform([&](const Block & header_)
         {
             auto transform = std::make_shared<CheckSortedTransform>(header_, sort_description);
             return transform;

From 160f1b7fd85add76128c978f0d7ae93729b53a5e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 8 Feb 2024 15:01:56 +0100
Subject: [PATCH 472/884] Fix logical optimizer with LowCardinality

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp    | 8 ++++----
 .../02987_logical_optimizer_pass_lowcardinality.reference | 0
 .../02987_logical_optimizer_pass_lowcardinality.sql       | 5 +++++
 3 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.reference
 create mode 100644 tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.sql

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index a8cdd27c9bf..5f08bb9035e 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -404,12 +404,12 @@ private:
 
             auto operand_type = and_operands[0]->getResultType();
             auto function_type = function_node.getResultType();
-            assert(!function_type->isNullable());
+            chassert(!function_type->isNullable());
             if (!function_type->equals(*operand_type))
             {
                 /// Result of equality operator can be low cardinality, while AND always returns UInt8.
                 /// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8`
-                assert(function_type->equals(*removeLowCardinality(operand_type)));
+                chassert(function_type->equals(*removeLowCardinality(operand_type)));
                 node = createCastFunction(std::move(and_operands[0]), function_type, getContext());
             }
             else
@@ -427,7 +427,7 @@ private:
     void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
     {
         auto & function_node = node->as<FunctionNode &>();
-        assert(function_node.getFunctionName() == "or");
+        chassert(function_node.getFunctionName() == "or");
 
         QueryTreeNodes or_operands;
 
@@ -486,7 +486,7 @@ private:
             /// first we create tuple from RHS of equals functions
             for (const auto & equals : equals_functions)
             {
-                is_any_nullable |= equals->getResultType()->isNullable();
+                is_any_nullable |= removeLowCardinality(equals->getResultType())->isNullable();
 
                 const auto * equals_function = equals->as<FunctionNode>();
                 assert(equals_function && equals_function->getFunctionName() == "equals");
diff --git a/tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.reference b/tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.sql b/tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.sql
new file mode 100644
index 00000000000..be7689025b2
--- /dev/null
+++ b/tests/queries/0_stateless/02987_logical_optimizer_pass_lowcardinality.sql
@@ -0,0 +1,5 @@
+CREATE TABLE 02987_logical_optimizer_table (key Int, value Int) ENGINE=Memory();
+CREATE VIEW v1 AS SELECT * FROM 02987_logical_optimizer_table;
+CREATE TABLE 02987_logical_optimizer_merge AS v1 ENGINE=Merge(currentDatabase(), 'v1');
+
+SELECT _table, key FROM 02987_logical_optimizer_merge WHERE (_table = toFixedString(toFixedString(toFixedString('v1', toNullable(2)), 2), 2)) OR ((value = toLowCardinality(toNullable(10))) AND (_table = toFixedString(toNullable('v3'), 2))) OR ((value = 20) AND (_table = toFixedString(toFixedString(toFixedString('v1', 2), 2), 2)) AND (_table = toFixedString(toLowCardinality(toFixedString('v3', 2)), 2))) SETTINGS allow_experimental_analyzer = true, join_use_nulls = true, convert_query_to_cnf = true;

From fa0acaf39ba3c9d78b80c6af6f22663ff78d79ff Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 8 Feb 2024 15:18:17 +0100
Subject: [PATCH 473/884] Fix image name

---
 tests/integration/test_replicating_constants/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py
index fbf7450577f..90132b71a64 100644
--- a/tests/integration/test_replicating_constants/test.py
+++ b/tests/integration/test_replicating_constants/test.py
@@ -8,7 +8,7 @@ node1 = cluster.add_instance("node1", with_zookeeper=True)
 node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
-    image="yandex/clickhouse-server",
+    image="clickhouse/clickhouse-server",
     tag="22.3",
     with_installed_binary=True,
 )

From 97d1eb109190b0bff8fdf3c61cf44d5c1fbafd7d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 8 Feb 2024 14:31:24 +0000
Subject: [PATCH 474/884] Fixing test.

---
 tests/integration/test_recompression_ttl/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_recompression_ttl/test.py b/tests/integration/test_recompression_ttl/test.py
index 851e3bb4eb8..9d7b09eacdf 100644
--- a/tests/integration/test_recompression_ttl/test.py
+++ b/tests/integration/test_recompression_ttl/test.py
@@ -155,7 +155,7 @@ def test_recompression_multiple_ttls(started_cluster):
         node2.query(
             "SELECT recompression_ttl_info.expression FROM system.parts where name = 'all_1_1_4'"
         )
-        == "['plus(d, toIntervalSecond(10))','plus(d, toIntervalSecond(15))','plus(d, toIntervalSecond(5))']\n"
+        == "['d + toIntervalSecond(10)','d + toIntervalSecond(15)','d + toIntervalSecond(5)']\n"
     )
 
 
From 1b9620001b31256ffb6e8fee7b521efa8688fbee Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 8 Feb 2024 15:24:01 +0100
Subject: [PATCH 475/884] Move ColumnArray-specific code out of the ActionsDAG.

---
 src/Columns/ColumnArray.cpp     | 15 +++++++++++++++
 src/Columns/ColumnArray.h       |  4 ++++
 src/Interpreters/ActionsDAG.cpp | 17 ++++++++++++-----
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index fddfa2ac6b2..6f60ec0e642 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -554,6 +554,21 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
 }
 
 
+MutableColumnPtr ColumnArray::getDataInRange(size_t start, size_t length) const
+{
+    if (start + length > getOffsets().size())
+        throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnArray::getDataPtrForRange method. "
+            "[start({}) + length({}) > offsets.size({})]", start, length, getOffsets().size());
+
+    size_t start_offset = offsetAt(start);
+    size_t end_offset = offsetAt(start + length);
+
+    auto res = getData().cloneEmpty();
+    res->insertRangeFrom(getData(), start_offset, end_offset - start_offset);
+    return res;
+}
+
+
 ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
 {
     if (typeid_cast<const ColumnUInt8 *>(data.get()))
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 407f44a6f3c..64c8801b0c3 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -143,6 +143,10 @@ public:
     const ColumnPtr & getOffsetsPtr() const { return offsets; }
     ColumnPtr & getOffsetsPtr() { return offsets; }
 
+    /// Returns a copy of the data column's part corresponding to a specified range of rows.
+    /// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
+    MutableColumnPtr getDataInRange(size_t start, size_t length) const;
+
     MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
     {
         return scatterImpl<ColumnArray>(num_columns, selector);
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index a1858916ca7..b3f3f8da26d 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -628,17 +628,24 @@ static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Nod
             if (!array)
                 throw Exception(ErrorCodes::TYPE_MISMATCH,
                                 "ARRAY JOIN of not array nor map: {}", node->result_name);
-            res_column.column = array->getDataPtr();
+
+            ColumnPtr data;
             if (input_rows_count < array->size())
-                res_column.column = res_column.column->cloneResized(array->getOffsets()[input_rows_count - 1]);
+                data = array->getDataInRange(0, input_rows_count);
+            else
+                data = array->getDataPtr();
+
+            res_column.column = data;
             break;
         }
 
         case ActionsDAG::ActionType::COLUMN:
         {
-            res_column.column = node->column;
-            if (input_rows_count < res_column.column->size())
-                res_column.column = res_column.column->cloneResized(input_rows_count);
+            auto column = node->column;
+            if (input_rows_count < column->size())
+                column = column->cloneResized(input_rows_count);
+
+            res_column.column = column;
             break;
         }
 

From 5f750871786772e1135084c34b5a20aa8c108c74 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 8 Feb 2024 15:48:20 +0000
Subject: [PATCH 476/884] Add comments

---
 src/Storages/StorageMerge.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index d3b8f30b1c5..09c38996b22 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -756,16 +756,23 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
     auto join_tree_type = query_node->getJoinTree()->getNodeType();
     auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
 
+    // For the case when join tree is just a table or a table function we don't need to do anything more.
     if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION)
         return modified_query;
 
+    // JOIN needs to be removed because StorageMerge should produce not joined data.
+    // GROUP BY should be removed as well.
+
     auto * modified_query_node = modified_query->as<QueryNode>();
 
+    // Remove the JOIN statement. As a result query will have a form like: SELECT * FROM <table> ...
     modified_query = modified_query->cloneAndReplace(modified_query_node->getJoinTree(), replacement_table_expression);
     modified_query_node = modified_query->as<QueryNode>();
 
     query_node = modified_query->as<QueryNode>();
 
+    // For backward compatibility we need to leave all filters related to this table.
+    // It may lead to some incorrect result.
     if (query_node->hasPrewhere())
         replaceFilterExpression(query_node->getPrewhere(), replacement_table_expression, context);
     if (query_node->hasWhere())
@@ -779,6 +786,9 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
     projection.clear();
     NamesAndTypes projection_columns;
 
+    // Select only required columns from the table, because prjection list may contain:
+    // 1. aggregate functions
+    // 2. expressions referencing other tables of JOIN
     for (auto const & column_name : required_column_names)
     {
         QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
@@ -791,6 +801,8 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
         auto fake_column = resolved_column->getColumn();
 
+        // Identifier is resolved to ColumnNode, but we need to get rid of ALIAS columns
+        // and also fix references to source expression (now column is referencing original table expression).
         ApplyAliasColumnExpressionsVisitor visitor(replacement_table_expression);
         visitor.visit(fake_node);
 
@@ -860,7 +872,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
 
                 QueryTreeNodePtr column_node;
 
-
+                // Replace all references to ALIAS columns in the query by expressions.
                 if (is_alias)
                 {
                     QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column});

From ccafc523a959d31ae1340135655ca1caee278887 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 8 Feb 2024 15:51:27 +0000
Subject: [PATCH 477/884] Fix style

---
 src/Functions/countMatches.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 7e0cf83fb44..04e86f31884 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -16,8 +16,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ILLEGAL_COLUMN;
     extern const int LOGICAL_ERROR;
 }
 

From dfb3a8f7be0c8f7aebb52d9cd3f7ebabd7f83af5 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 8 Feb 2024 13:52:46 +0100
Subject: [PATCH 478/884] Another fix

---
 src/Coordination/KeeperSnapshotManager.cpp |  5 ++---
 src/Coordination/KeeperStorage.cpp         | 21 ++++++++++-----------
 src/Coordination/KeeperStorage.h           |  9 ++++++++-
 utils/keeper-bench/Generator.cpp           |  1 -
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index 07116fedfab..d7c9acae07a 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -89,11 +89,10 @@ namespace
         writeBinary(node.ephemeralOwner(), out);
         if (version < SnapshotVersion::V6)
             writeBinary(static_cast<int32_t>(node.data_size), out);
-        const bool is_ephemeral = node.isEphemeral();
-        writeBinary(is_ephemeral ? 0 : node.numChildren(), out);
+        writeBinary(node.numChildren(), out);
         writeBinary(node.pzxid, out);
 
-        writeBinary(is_ephemeral ? 0 : node.seqNum(), out);
+        writeBinary(node.seqNum(), out);
 
         if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
             writeBinary(node.sizeInBytes(), out);
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index e855274f08b..9d4c22164d3 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -185,9 +185,8 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
     hash.update(node.version);
     hash.update(node.cversion);
     hash.update(node.aversion);
-    bool is_ephemeral = node.isEphemeral();
-    hash.update(is_ephemeral ? node.ephemeralOwner() : 0);
-    hash.update(is_ephemeral ? 0 : node.numChildren());
+    hash.update(node.ephemeralOwner());
+    hash.update(node.numChildren());
     hash.update(node.pzxid);
 
     return hash.get64();
@@ -223,6 +222,9 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other)
         data = new char[data_size];
         memcpy(data, other.data, data_size);
     }
+
+    children = other.children;
+
     return *this;
 }
 
@@ -252,7 +254,7 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
     if (stat.ephemeralOwner == 0)
     {
         is_ephemeral_and_mtime.is_ephemeral = false;
-        ephemeral_or_children_data.children_info.num_children = stat.numChildren;
+        setNumChildren(stat.numChildren);
     }
     else
     {
@@ -269,10 +271,9 @@ void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) co
     response_stat.version = version;
     response_stat.cversion = cversion;
     response_stat.aversion = aversion;
-    bool is_ephemeral = isEphemeral();
-    response_stat.ephemeralOwner = is_ephemeral ? ephemeral_or_children_data.ephemeral_owner : 0;
+    response_stat.ephemeralOwner = ephemeralOwner();
     response_stat.dataLength = static_cast<int32_t>(data_size);
-    response_stat.numChildren = is_ephemeral ? 0 : numChildren();
+    response_stat.numChildren = numChildren();
     response_stat.pzxid = pzxid;
 
 }
@@ -1316,7 +1317,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
             KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent)
                                            {
                                                ++parent.cversion;
-                                               --parent.ephemeral_or_children_data.children_info.num_children;
+                                               parent.decreaseNumChildren();
                                            }});
 
         new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version, node->ephemeralOwner()});
@@ -1561,9 +1562,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
 
                 auto list_request_type = ALL;
                 if (auto * filtered_list = dynamic_cast<Coordination::ZooKeeperFilteredListRequest *>(&request))
-                {
                     list_request_type = filtered_list->list_request_type;
-                }
 
                 if (list_request_type == ALL)
                     return true;
@@ -2294,7 +2293,7 @@ void KeeperStorage::preprocessRequest(
                         [ephemeral_path](Node & parent)
                         {
                             ++parent.cversion;
-                            --parent.ephemeral_or_children_data.children_info.num_children;
+                            parent.decreaseNumChildren();
                         }
                     }
                 );
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index c3350275d2f..cf9c1710d3d 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -90,12 +90,13 @@ public:
         {
             if (isEphemeral())
                 return ephemeral_or_children_data.ephemeral_owner;
+
             return 0;
         }
 
         void setEphemeralOwner(int64_t ephemeral_owner)
         {
-            is_ephemeral_and_mtime.is_ephemeral = true;
+            is_ephemeral_and_mtime.is_ephemeral = ephemeral_owner != 0;
             ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
         }
 
@@ -118,6 +119,12 @@ public:
             ++ephemeral_or_children_data.children_info.num_children;
         }
 
+        void decreaseNumChildren()
+        {
+            chassert(!isEphemeral());
+            --ephemeral_or_children_data.children_info.num_children;
+        }
+
         int32_t seqNum() const
         {
             if (isEphemeral())
diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp
index a3e85f670d6..2212f7158ae 100644
--- a/utils/keeper-bench/Generator.cpp
+++ b/utils/keeper-bench/Generator.cpp
@@ -455,7 +455,6 @@ Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coo
 
     auto request = std::make_shared<ZooKeeperCreateRequest>();
     request->acls = acls;
-    request->is_sequential = true;
 
     std::string path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString();
 

From e55b60e05c8f564738d92bfa35f68a378732f690 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 8 Feb 2024 17:40:41 +0100
Subject: [PATCH 479/884] Fix

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp    | 28 +++++++++++++-
 src/Storages/RabbitMQ/RabbitMQConsumer.h      |  3 ++
 src/Storages/RabbitMQ/RabbitMQSource.cpp      | 21 ++++++++---
 src/Storages/RabbitMQ/RabbitMQSource.h        |  1 +
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp     | 37 ++++++++++++++-----
 .../integration/test_storage_rabbitmq/test.py |  3 ++
 6 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 1843bebe3c7..28dc239ae37 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -128,6 +128,32 @@ bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info)
     return false;
 }
 
+bool RabbitMQConsumer::nackMessages(const CommitInfo & commit_info)
+{
+    if (state != State::OK)
+        return false;
+
+    /// Nothing to nack.
+    if (!commit_info.delivery_tag || commit_info.delivery_tag <= last_commited_delivery_tag)
+        return false;
+
+    if (consumer_channel->reject(commit_info.delivery_tag, AMQP::multiple))
+    {
+        LOG_TRACE(
+            log, "Consumer rejected messages with deliveryTags from {} to {} on channel {}",
+            last_commited_delivery_tag, commit_info.delivery_tag, channel_id);
+
+        return true;
+    }
+
+    LOG_ERROR(
+        log,
+        "Failed to reject messages for {}:{}, (current commit point {}:{})",
+        commit_info.channel_id, commit_info.delivery_tag,
+        channel_id, last_commited_delivery_tag);
+
+    return false;
+}
 
 void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection)
 {
@@ -161,7 +187,7 @@ void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection)
 
     consumer_channel->onError([&](const char * message)
     {
-        LOG_ERROR(log, "Channel {} in an error state: {}", channel_id, message);
+        LOG_ERROR(log, "Channel {} in in error state: {}", channel_id, message);
         state = State::ERROR;
     });
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index c78b33bfc7c..9dad175dda3 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -50,7 +50,9 @@ public:
         UInt64 delivery_tag = 0;
         String channel_id;
     };
+
     const MessageData & currentMessage() { return current; }
+    const String & getChannelID() const { return channel_id; }
 
     /// Return read buffer containing next available message
     /// or nullptr if there are no messages to process.
@@ -63,6 +65,7 @@ public:
     bool isConsumerStopped() const { return stopped.load(); }
 
     bool ackMessages(const CommitInfo & commit_info);
+    bool nackMessages(const CommitInfo & commit_info);
 
     bool hasPendingMessages() { return !received.empty(); }
 
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index 3cec448fc11..6c50d440373 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -120,10 +120,20 @@ Chunk RabbitMQSource::generateImpl()
     {
         auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds());
         consumer = storage.popConsumer(timeout);
+
+        if (consumer->needChannelUpdate())
+        {
+            LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID());
+            consumer->updateChannel(storage.getConnection());
+        }
     }
 
     if (is_finished || !consumer || consumer->isConsumerStopped())
+    {
+        LOG_TRACE(log, "RabbitMQSource is stopped (is_finished: {}, consumer_stopped: {})",
+                  is_finished, consumer ? toString(consumer->isConsumerStopped()) : "No consumer");
         return {};
+    }
 
     /// Currently it is one time usage source: to make sure data is flushed
     /// strictly by timeout or by block size.
@@ -254,13 +264,12 @@ Chunk RabbitMQSource::generateImpl()
 
 bool RabbitMQSource::sendAck()
 {
-    if (!consumer)
-        return false;
+    return consumer && consumer->ackMessages(commit_info);
+}
 
-    if (!consumer->ackMessages(commit_info))
-        return false;
-
-    return true;
+bool RabbitMQSource::sendNack()
+{
+    return consumer && consumer->nackMessages(commit_info);
 }
 
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h
index 21d059bfae2..0d6fad97054 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.h
+++ b/src/Storages/RabbitMQ/RabbitMQSource.h
@@ -33,6 +33,7 @@ public:
     bool needChannelUpdate();
     void updateChannel();
     bool sendAck();
+    bool sendNack();
 
 private:
     StorageRabbitMQ & storage;
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 868f48d0b7d..880602bf272 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -1061,7 +1061,8 @@ bool StorageRabbitMQ::tryStreamToViews()
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto source = std::make_shared<RabbitMQSource>(
-            *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false);
+            *this, storage_snapshot, rabbitmq_context, column_names, block_size,
+            max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false);
 
         sources.emplace_back(source);
         pipes.emplace_back(source);
@@ -1069,13 +1070,25 @@ bool StorageRabbitMQ::tryStreamToViews()
 
     block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes)));
 
+    std::atomic_size_t rows = 0;
+    block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); });
+
     if (!connection->getHandler().loopRunning())
         startLoop();
 
+    bool write_failed = false;
+    try
     {
         CompletedPipelineExecutor executor(block_io.pipeline);
         executor.execute();
     }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+        write_failed = true;
+    }
+
+    LOG_TRACE(log, "Processed {} rows", rows);
 
     /* Note: sending ack() with loop running in another thread will lead to a lot of data races inside the library, but only in case
      * error occurs or connection is lost while ack is being sent
@@ -1083,13 +1096,6 @@ bool StorageRabbitMQ::tryStreamToViews()
     deactivateTask(looping_task, false, true);
     size_t queue_empty = 0;
 
-    if (!hasDependencies(getStorageID()))
-    {
-        /// Do not commit to rabbitmq if the dependency was removed.
-        LOG_TRACE(log, "No dependencies, reschedule");
-        return false;
-    }
-
     if (!connection->isConnected())
     {
         if (shutdown_called)
@@ -1130,7 +1136,7 @@ bool StorageRabbitMQ::tryStreamToViews()
              *    the same channel will also commit all previously not-committed messages. Anyway I do not think that for ack frame this
              *    will ever happen.
              */
-            if (!source->sendAck())
+            if (write_failed ? source->sendNack() : source->sendAck())
             {
                 /// Iterate loop to activate error callbacks if they happened
                 connection->getHandler().iterateLoop();
@@ -1142,6 +1148,19 @@ bool StorageRabbitMQ::tryStreamToViews()
         }
     }
 
+    if (write_failed)
+    {
+        LOG_TRACE(log, "Write failed, reschedule");
+        return false;
+    }
+
+    if (!hasDependencies(getStorageID()))
+    {
+        /// Do not commit to rabbitmq if the dependency was removed.
+        LOG_TRACE(log, "No dependencies, reschedule");
+        return false;
+    }
+
     if ((queue_empty == num_created_consumers) && (++read_attempts == MAX_FAILED_READ_ATTEMPTS))
     {
         connection->heartbeat();
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index b778e9fb556..d129543d68f 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -3549,3 +3549,6 @@ def test_attach_broken_table(rabbitmq_cluster):
     assert "CANNOT_CONNECT_RABBITMQ" in error
     error = instance.query_and_get_error("INSERT INTO rabbit_queue VALUES ('test')")
     assert "CANNOT_CONNECT_RABBITMQ" in error
+
+
+# TODO: add a test

From d97c5496f54ea157796aae309bbe980db831c8d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 18:37:07 +0100
Subject: [PATCH 480/884] Fix unpoison

---
 src/Common/MemorySanitizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h
index 2c4ae3a6cfb..cc6b0d5171f 100644
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@@ -28,7 +28,7 @@
 #        undef __msan_unpoison_string
 #        include <sanitizer/msan_interface.h>
 #        undef __msan_unpoison_overflow_15
-#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE) - ((PTR_SIZE) % 16)], ((PTR_SIZE) % 16))
+#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE) - ((PTR_SIZE) % 16)], 16)
 #    endif
 #endif
 

From 927e00f8f1a7ee602c72f40f9e47657e04669222 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 8 Feb 2024 18:51:50 +0100
Subject: [PATCH 481/884] Update libuv

---
 contrib/libuv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libuv b/contrib/libuv
index 3a85b2eb3d8..4482964660c 160000
--- a/contrib/libuv
+++ b/contrib/libuv
@@ -1 +1 @@
-Subproject commit 3a85b2eb3d83f369b8a8cafd329d7e9dc28f60cf
+Subproject commit 4482964660c77eec1166cd7d14fb915e3dbd774a

From 16e2b275c26eb0916b92b98f9500745ff2e72934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 19:08:46 +0100
Subject: [PATCH 482/884] Fix unpoison

---
 src/Common/MemorySanitizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h
index cc6b0d5171f..02747d1e130 100644
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@@ -28,7 +28,7 @@
 #        undef __msan_unpoison_string
 #        include <sanitizer/msan_interface.h>
 #        undef __msan_unpoison_overflow_15
-#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE) - ((PTR_SIZE) % 16)], 16)
+#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE)], ((16 - ((PTR_SIZE) % 16)) % 16))
 #    endif
 #endif
 

From 641c7b547d898696fe6719cf2f5662b138f4e44c Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Sun, 4 Feb 2024 19:12:37 +0000
Subject: [PATCH 483/884] CI: enable await

 #no_merge_commit
---
 .github/workflows/master.yml         |  97 ++++++++
 .github/workflows/pull_request.yml   |   2 +-
 .github/workflows/reusable_build.yml |   1 +
 .github/workflows/reusable_test.yml  |   1 +
 tests/ci/ci.py                       | 360 ++++++++++++++++++---------
 tests/ci/ci_config.py                |  28 +--
 tests/ci/ci_utils.py                 |  17 +-
 tests/ci/commit_status_helper.py     |   3 +
 tests/ci/report.py                   |   3 +-
 tests/ci/test_ci_cache.py            |  35 ++-
 10 files changed, 409 insertions(+), 138 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 2471e4f9194..dac1332adc6 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -385,6 +385,22 @@ jobs:
       test_name: Stateless tests (release, s3 storage)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestS3Debug:
+    needs: [RunConfig, BuilderDebDebug]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (debug, s3 storage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestS3Tsan:
+    needs: [RunConfig, BuilderDebTsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (tsan, s3 storage)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestAarch64:
     needs: [RunConfig, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}
@@ -493,6 +509,55 @@ jobs:
       test_name: Stateful tests (debug)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
+  # Parallel replicas
+  FunctionalStatefulTestDebugParallelReplicas:
+    needs: [RunConfig, BuilderDebDebug]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestUBsanParallelReplicas:
+    needs: [RunConfig, BuilderDebUBsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (ubsan, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestMsanParallelReplicas:
+    needs: [RunConfig, BuilderDebMsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (msan, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestTsanParallelReplicas:
+    needs: [RunConfig, BuilderDebTsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (tsan, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestAsanParallelReplicas:
+    needs: [RunConfig, BuilderDebAsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (asan, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatefulTestReleaseParallelReplicas:
+    needs: [RunConfig, BuilderDebRelease]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (release, ParallelReplicas)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
 ##############################################################################################
 ########################### ClickBench #######################################################
 ##############################################################################################
@@ -700,6 +765,28 @@ jobs:
       runner_type: func-tester-aarch64
       data: ${{ needs.RunConfig.outputs.data }}
 ##############################################################################################
+############################ SQLLOGIC TEST ###################################################
+##############################################################################################
+  SQLLogicTestRelease:
+    needs: [RunConfig, BuilderDebRelease]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Sqllogic test (release)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+##############################################################################################
+##################################### SQL TEST ###############################################
+##############################################################################################
+  SQLTest:
+    needs: [RunConfig, BuilderDebRelease]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLTest
+      runner_type: fuzzer-unit-tester
+      data: ${{ needs.RunConfig.outputs.data }}
+##############################################################################################
 ###################################### SQLANCER FUZZERS ######################################
 ##############################################################################################
   SQLancerTestRelease:
@@ -732,6 +819,8 @@ jobs:
       - FunctionalStatelessTestTsan
       - FunctionalStatelessTestMsan
       - FunctionalStatelessTestUBsan
+      - FunctionalStatelessTestS3Debug
+      - FunctionalStatelessTestS3Tsan
       - FunctionalStatefulTestDebug
       - FunctionalStatefulTestRelease
       - FunctionalStatefulTestAarch64
@@ -739,6 +828,12 @@ jobs:
       - FunctionalStatefulTestTsan
       - FunctionalStatefulTestMsan
       - FunctionalStatefulTestUBsan
+      - FunctionalStatefulTestDebugParallelReplicas
+      - FunctionalStatefulTestUBsanParallelReplicas
+      - FunctionalStatefulTestMsanParallelReplicas
+      - FunctionalStatefulTestTsanParallelReplicas
+      - FunctionalStatefulTestAsanParallelReplicas
+      - FunctionalStatefulTestReleaseParallelReplicas
       - StressTestDebug
       - StressTestAsan
       - StressTestTsan
@@ -764,6 +859,8 @@ jobs:
       - UnitTestsReleaseClang
       - SQLancerTestRelease
       - SQLancerTestDebug
+      - SQLLogicTestRelease
+      - SQLTest
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 09e2d6dbb97..cf31738643b 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -1002,7 +1002,7 @@ jobs:
 ####################################### libFuzzer ###########################################
 #############################################################################################
   libFuzzer:
-    if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'libFuzzer') }}
+    if: ${{ !failure() && !cancelled() }}
     needs: [RunConfig, StyleCheck]
     uses: ./.github/workflows/libfuzzer.yml
     with:
diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 2371579692f..6be9d30175e 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -85,6 +85,7 @@ jobs:
         run: |
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
       - name: Mark as done
+        if: ${{ !cancelled() }}
         run: |
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.build_name}}'
       - name: Clean
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 749f64d434e..e30ef863a86 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -107,6 +107,7 @@ jobs:
         run: |
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
       - name: Mark as done
+        if: ${{ !cancelled() }}
         run: |
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.test_name}}' --batch ${{matrix.batch}}
       - name: Clean
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 622b7bb005a..5c33d4e02a8 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1,5 +1,8 @@
 import argparse
 import concurrent.futures
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from enum import Enum
 import json
 import logging
 import os
@@ -7,16 +10,14 @@ import re
 import subprocess
 import sys
 import time
-from dataclasses import asdict, dataclass
-from enum import Enum
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Union
 
 import docker_images_helper
 import upload_result_helper
 from build_check import get_release_or_pr
-from ci_config import CI_CONFIG, Build, JobNames, Labels
-from ci_utils import GHActions, is_hex
+from ci_config import CI_CONFIG, Build, Labels, JobNames
+from ci_utils import GHActions, is_hex, normalize_string
 from clickhouse_helper import (
     CiLogsCredentials,
     ClickHouseHelper,
@@ -48,7 +49,7 @@ from git_helper import GIT_PREFIX, Git
 from git_helper import Runner as GitRunner
 from github import Github
 from pr_info import PRInfo
-from report import SUCCESS, BuildResult, JobReport
+from report import ERROR, SUCCESS, BuildResult, JobReport
 from s3_helper import S3Helper
 from version_helper import get_version_from_repo
 
@@ -88,6 +89,7 @@ class CiCache:
     class RecordType(Enum):
         SUCCESSFUL = "successful"
         PENDING = "pending"
+        FAILED = "failed"
 
     @dataclass
     class Record:
@@ -249,6 +251,13 @@ class CiCache:
         )
         return record
 
+    def print_status(self):
+        for record_type in self.RecordType:
+            GHActions.print_in_group(
+                f"Cache records: [{record_type}]", list(self.records[record_type])
+            )
+        return self
+
     def update(self):
         """
         Pulls cache records from s3. Only records name w/o content.
@@ -260,9 +269,6 @@ class CiCache:
                 path = self.cache_s3_paths[job_type]
                 records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET)
                 records = [record.split("/")[-1] for record in records]
-                GHActions.print_in_group(
-                    f"Cache records: [{record_type}] in [{job_type.value}]", records
-                )
                 for file in records:
                     record = self._parse_record_file_name(
                         record_type=record_type, file_name=file
@@ -384,6 +390,9 @@ class CiCache:
             if record_type == self.RecordType.SUCCESSFUL:
                 assert isinstance(status, CommitStatusData)
                 status.dump_to_file(record_file)
+            elif record_type == self.RecordType.FAILED:
+                assert isinstance(status, CommitStatusData)
+                status.dump_to_file(record_file)
             elif record_type == self.RecordType.PENDING:
                 assert isinstance(status, PendingState)
                 with open(record_file, "w") as json_file:
@@ -488,6 +497,16 @@ class CiCache:
             self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
         )
 
+    def is_failed(
+        self, job: str, batch: int, num_batches: int, release_branch: bool
+    ) -> bool:
+        """
+        checks if a given job have already been done with failure
+        """
+        return self.exist(
+            self.RecordType.FAILED, job, batch, num_batches, release_branch
+        )
+
     def is_pending(
         self, job: str, batch: int, num_batches: int, release_branch: bool
     ) -> bool:
@@ -495,8 +514,9 @@ class CiCache:
         check pending record in the cache for a given job
         @release_branch - checks that "release" attribute is set for a record
         """
-        if self.is_successful(job, batch, num_batches, release_branch):
-            # successful record is present - not pending
+        if self.is_successful(
+            job, batch, num_batches, release_branch
+        ) or self.is_failed(job, batch, num_batches, release_branch):
             return False
 
         return self.exist(
@@ -524,6 +544,27 @@ class CiCache:
             release_branch,
         )
 
+    def push_failed(
+        self,
+        job: str,
+        batch: int,
+        num_batches: int,
+        job_status: CommitStatusData,
+        release_branch: bool = False,
+    ) -> None:
+        """
+        Pushes a cache record of type Failed (CommitStatusData)
+        @release_branch adds "release" attribute to a record
+        """
+        self.push(
+            self.RecordType.FAILED,
+            job,
+            [batch],
+            num_batches,
+            job_status,
+            release_branch,
+        )
+
     def push_pending(
         self, job: str, batches: List[int], num_batches: int, release_branch: bool
     ) -> None:
@@ -591,46 +632,87 @@ class CiCache:
             bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
         )
 
-    # def await_jobs(self, jobs_with_params: Dict[str, Dict[str, Any]]) -> List[str]:
-    # if not jobs_with_params:
-    #     return []
-    # print(f"Start awaiting jobs [{list(jobs_with_params)}]")
-    # poll_interval_sec = 180
-    # start_at = int(time.time())
-    # TIMEOUT = 3000
-    # expired_sec = 0
-    # done_jobs = []  # type: List[str]
-    # while expired_sec < TIMEOUT and jobs_with_params:
-    #     time.sleep(poll_interval_sec)
-    #     self.update()
-    #     pending_finished: List[str] = []
-    #     for job_name in jobs_with_params:
-    #         num_batches = jobs_with_params[job_name]["num_batches"]
-    #         for batch in jobs_with_params[job_name]["batches"]:
-    #             if self.is_pending(job_name, batch, num_batches):
-    #                 continue
-    #             print(
-    #                 f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
-    #             )
-    #             pending_finished.append(job_name)
-    #     if pending_finished:
-    #         # restart timer
-    #         start_at = int(time.time())
-    #         expired_sec = 0
-    #         # remove finished jobs from awaiting list
-    #         for job in pending_finished:
-    #             del jobs_with_params[job]
-    #             done_jobs.append(job)
-    #     else:
-    #         expired_sec = int(time.time()) - start_at
-    #     print(f"  ...awaiting continues... time left [{TIMEOUT - expired_sec}]")
-    # if done_jobs:
-    #     print(
-    #         f"Awaiting OK. Left jobs: [{list(jobs_with_params)}], finished jobs: [{done_jobs}]"
-    #     )
-    # else:
-    #     print("Awaiting FAILED. No job has finished.")
-    # return done_jobs
+    def await_jobs(
+        self, jobs_with_params: Dict[str, Dict[str, Any]], is_release_branch: bool
+    ) -> Dict[str, List[int]]:
+        """
+        await pending jobs to be finished
+        @jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}}
+        returns successfully finished jobs: {JOB_NAME: [BATCHES...]}
+        """
+        if not jobs_with_params:
+            return {}
+        poll_interval_sec = 300
+        TIMEOUT = 3600
+        await_finished: Dict[str, List[int]] = {}
+        round_cnt = 0
+        while len(jobs_with_params) > 5 and round_cnt < 3:
+            round_cnt += 1
+            GHActions.print_in_group(
+                f"Wait pending jobs, round [{round_cnt}]:", list(jobs_with_params)
+            )
+            # this is initial approach to wait pending jobs:
+            # start waiting for the next TIMEOUT seconds if there are more than X(=5) jobs to wait
+            # wait TIMEOUT seconds in rounds. Y(=3) is the max number of rounds
+            expired_sec = 0
+            start_at = int(time.time())
+            while expired_sec < TIMEOUT and jobs_with_params:
+                time.sleep(poll_interval_sec)
+                self.update()
+                jobs_with_params_copy = deepcopy(jobs_with_params)
+                for job_name in jobs_with_params:
+                    num_batches = jobs_with_params[job_name]["num_batches"]
+                    job_config = CI_CONFIG.get_job_config(job_name)
+                    for batch in jobs_with_params[job_name]["batches"]:
+                        if self.is_pending(
+                            job_name,
+                            batch,
+                            num_batches,
+                            release_branch=is_release_branch
+                            and job_config.required_on_release_branch,
+                        ):
+                            continue
+                        print(
+                            f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
+                        )
+
+                        # some_job_ready = True
+                        jobs_with_params_copy[job_name]["batches"].remove(batch)
+                        if not jobs_with_params_copy[job_name]["batches"]:
+                            del jobs_with_params_copy[job_name]
+
+                        if not self.is_successful(
+                            job_name,
+                            batch,
+                            num_batches,
+                            release_branch=is_release_branch
+                            and job_config.required_on_release_branch,
+                        ):
+                            print(
+                                f"NOTE: Job [{job_name}:{batch}] finished but no success - remove from awaiting list, do not add to ready"
+                            )
+                            continue
+                        if job_name in await_finished:
+                            await_finished[job_name].append(batch)
+                        else:
+                            await_finished[job_name] = [batch]
+                jobs_with_params = jobs_with_params_copy
+                expired_sec = int(time.time()) - start_at
+                print(
+                    f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]"
+                )
+        if await_finished:
+            GHActions.print_in_group(
+                "Finished jobs:",
+                [f"{job}:{batches}" for job, batches in await_finished.items()],
+            )
+        else:
+            print("Awaiting FAILED. No job has finished successfully.")
+        GHActions.print_in_group(
+            "Remaining jobs:",
+            [f"{job}:{params['batches']}" for job, params in jobs_with_params.items()],
+        )
+        return await_finished
 
 
 def get_check_name(check_name: str, batch: int, num_batches: int) -> str:
@@ -832,7 +914,10 @@ def _pre_action(s3, indata, pr_info):
     ci_cache = CiCache(s3, indata["jobs_data"]["digests"])
 
     # for release/master branches reports must be from the same branches
-    report_prefix = pr_info.head_ref if pr_info.number == 0 else ""
+    report_prefix = normalize_string(pr_info.head_ref) if pr_info.number == 0 else ""
+    print(
+        f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
+    )
     reports_files = ci_cache.download_build_reports(file_prefix=report_prefix)
     print(f"Pre action done. Report files [{reports_files}] have been downloaded")
 
@@ -883,8 +968,19 @@ def _mark_success_action(
                 job, batch, num_batches, job_status, pr_info.is_release_branch()
             )
             print(f"Job [{job}] is ok")
-        elif job_status:
-            print(f"Job [{job}] is not ok, status [{job_status.status}]")
+        elif job_status and not job_status.is_ok():
+            ci_cache.push_failed(
+                job, batch, num_batches, job_status, pr_info.is_release_branch()
+            )
+            print(f"Job [{job}] is failed with status [{job_status.status}]")
+        else:
+            job_status = CommitStatusData(
+                description="dummy description", status=ERROR, report_url="dummy url"
+            )
+            ci_cache.push_failed(
+                job, batch, num_batches, job_status, pr_info.is_release_branch()
+            )
+            print(f"No CommitStatusData for [{job}], push dummy failure to ci_cache")
 
 
 def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None:
@@ -992,8 +1088,8 @@ def _configure_jobs(
     jobs_to_do: List[str] = []
     jobs_to_skip: List[str] = []
     digests: Dict[str, str] = {}
-    print("::group::Job Digests")
 
+    print("::group::Job Digests")
     for job in CI_CONFIG.job_generator():
         digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job))
         digests[job] = digest
@@ -1003,7 +1099,8 @@ def _configure_jobs(
     ## b. check what we need to run
     ci_cache = None
     if not ci_cache_disabled:
-        ci_cache = CiCache(s3, digests)
+        ci_cache = CiCache(s3, digests).update()
+        ci_cache.print_status()
 
     jobs_to_wait: Dict[str, Dict[str, Any]] = {}
 
@@ -1012,10 +1109,13 @@ def _configure_jobs(
         job_config = CI_CONFIG.get_job_config(job)
         num_batches: int = job_config.num_batches
         batches_to_do: List[int] = []
+        add_to_skip = False
 
         for batch in range(num_batches):  # type: ignore
             if job_config.pr_only and pr_info.is_release_branch():
                 continue
+            if job_config.release_only and not pr_info.is_release_branch():
+                continue
             if job_config.run_by_label:
                 # this job controlled by label, add to todo if its label is set in pr
                 if job_config.run_by_label in pr_info.labels:
@@ -1036,7 +1136,13 @@ def _configure_jobs(
                 batches_to_do.append(batch)
 
                 # check if it's pending in the cache
-                if ci_cache.is_pending(job, batch, num_batches, release_branch=False):
+                if ci_cache.is_pending(
+                    job,
+                    batch,
+                    num_batches,
+                    release_branch=pr_info.is_release_branch()
+                    and job_config.required_on_release_branch,
+                ):
                     if job in jobs_to_wait:
                         jobs_to_wait[job]["batches"].append(batch)
                     else:
@@ -1044,10 +1150,12 @@ def _configure_jobs(
                             "batches": [batch],
                             "num_batches": num_batches,
                         }
+            else:
+                add_to_skip = True
 
         if batches_to_do:
             jobs_to_do.append(job)
-        elif not job_config.run_by_label:
+        elif add_to_skip:
             # treat job as being skipped only if it's controlled by digest
             jobs_to_skip.append(job)
         jobs_params[job] = {
@@ -1119,49 +1227,64 @@ def _configure_jobs(
         "digests": digests,
         "jobs_to_do": jobs_to_do,
         "jobs_to_skip": jobs_to_skip,
-        "jobs_to_wait": jobs_to_wait,
+        "jobs_to_wait": {
+            job: params for job, params in jobs_to_wait.items() if job in jobs_to_do
+        },
         "jobs_params": {
             job: params for job, params in jobs_params.items() if job in jobs_to_do
         },
     }
 
 
+def _create_gh_status(
+    commit: Any, job: str, batch: int, num_batches: int, job_status: CommitStatusData
+) -> None:
+    print(f"Going to re-create GH status for job [{job}]")
+    assert job_status.status == SUCCESS, "BUG!"
+    commit.create_status(
+        state=job_status.status,
+        target_url=job_status.report_url,
+        description=format_description(
+            f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
+            f"{job_status.description}"
+        ),
+        context=get_check_name(job, batch=batch, num_batches=num_batches),
+    )
+
+
 def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
     if indata["ci_flags"][Labels.NO_CI_CACHE]:
         print("CI cache is disabled - skip restoring commit statuses from CI cache")
         return
     job_digests = indata["jobs_data"]["digests"]
-    ci_cache = CiCache(s3, job_digests).update().fetch_records_data()
+    jobs_to_skip = indata["jobs_data"]["jobs_to_skip"]
+    jobs_to_do = indata["jobs_data"]["jobs_to_do"]
+    ci_cache = CiCache(s3, job_digests).update().fetch_records_data().print_status()
 
     # create GH status
     pr_info = PRInfo()
     commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
 
-    def _run_create_status(job: str, batch: int, num_batches: int) -> None:
+    def _concurrent_create_status(job: str, batch: int, num_batches: int) -> None:
         job_status = ci_cache.get_successful(job, batch, num_batches)
         if not job_status:
             return
-        print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
-        assert job_status.status == SUCCESS, "BUG!"
-        commit.create_status(
-            state=job_status.status,
-            target_url=job_status.report_url,
-            description=format_description(
-                f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
-                f"{job_status.description}"
-            ),
-            context=get_check_name(job, batch=batch, num_batches=num_batches),
-        )
+        _create_gh_status(commit, job, batch, num_batches, job_status)
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
         futures = []
         for job in job_digests:
+            if job not in jobs_to_skip or job not in jobs_to_do:
+                # no need to create status for job that are not supposed to be executed
+                continue
             if CI_CONFIG.is_build_job(job):
                 # no GH status for build jobs
                 continue
             num_batches = CI_CONFIG.get_job_config(job).num_batches
             for batch in range(num_batches):
-                future = executor.submit(_run_create_status, job, batch, num_batches)
+                future = executor.submit(
+                    _concurrent_create_status, job, batch, num_batches
+                )
                 futures.append(future)
         done, _ = concurrent.futures.wait(futures)
         for future in done:
@@ -1194,7 +1317,7 @@ def _upload_build_artifacts(
         (
             get_release_or_pr(pr_info, get_version_from_repo())[1],
             pr_info.sha,
-            CI_CONFIG.normalize_string(build_name),
+            normalize_string(build_name),
             "performance.tar.zst",
         )
     )
@@ -1509,30 +1632,51 @@ def main() -> int:
         if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
             _update_config_for_docs_only(jobs_data)
 
-        # TODO: await pending jobs
-        # wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited
-        # awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {}))
-        # for job in awaited_jobs:
-        #     jobs_to_do = jobs_data["jobs_to_do"]
-        #     if job in jobs_to_do:
-        #         jobs_to_do.remove(job)
-        #     else:
-        #         assert False, "BUG"
-
-        # set planned jobs as pending in the CI cache if on the master
-        if pr_info.is_master() and not args.skip_jobs:
+        if not args.skip_jobs:
             ci_cache = CiCache(s3, jobs_data["digests"])
-            for job in jobs_data["jobs_to_do"]:
-                config = CI_CONFIG.get_job_config(job)
-                if config.run_always or config.run_by_label:
-                    continue
-                job_params = jobs_data["jobs_params"][job]
-                ci_cache.push_pending(
-                    job,
-                    job_params["batches"],
-                    config.num_batches,
-                    release_branch=pr_info.is_release_branch(),
+
+            if (
+                pr_info.is_release_branch()
+                or pr_info.event.get("pull_request", {})
+                .get("user", {})
+                .get("login", "not_maxknv")
+                == "maxknv"
+            ):
+                # wait for pending jobs to be finished, await_jobs is a long blocking call
+                # wait pending jobs (for now only on release/master branches)
+                ready_jobs_batches_dict = ci_cache.await_jobs(
+                    jobs_data.get("jobs_to_wait", {}), pr_info.is_release_branch()
                 )
+                jobs_to_do = jobs_data["jobs_to_do"]
+                jobs_to_skip = jobs_data["jobs_to_skip"]
+                jobs_params = jobs_data["jobs_params"]
+                for job, batches in ready_jobs_batches_dict.items():
+                    if job not in jobs_params:
+                        print(f"WARNING: Job [{job}] is not in the params list")
+                        continue
+                    for batch in batches:
+                        jobs_params[job]["batches"].remove(batch)
+                    if not jobs_params[job]["batches"]:
+                        jobs_to_do.remove(job)
+                        jobs_to_skip.append(job)
+                        del jobs_params[job]
+
+            # set planned jobs as pending in the CI cache if on the master
+            if pr_info.is_master():
+                for job in jobs_data["jobs_to_do"]:
+                    config = CI_CONFIG.get_job_config(job)
+                    if config.run_always or config.run_by_label:
+                        continue
+                    job_params = jobs_data["jobs_params"][job]
+                    ci_cache.push_pending(
+                        job,
+                        job_params["batches"],
+                        config.num_batches,
+                        release_branch=pr_info.is_release_branch(),
+                    )
+
+            if "jobs_to_wait" in jobs_data:
+                del jobs_data["jobs_to_wait"]
 
         # conclude results
         result["git_ref"] = git_ref
@@ -1608,23 +1752,15 @@ def main() -> int:
                         check_name, args.batch, job_config.num_batches
                     )
                     assert job_status, "BUG"
-                    commit.create_status(
-                        state=job_status.status,
-                        target_url=job_status.report_url,
-                        description=format_description(
-                            f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
-                            f"{job_status.description}"
-                        ),
-                        context=get_check_name(
-                            check_name,
-                            batch=args.batch,
-                            num_batches=job_config.num_batches,
-                        ),
+                    _create_gh_status(
+                        commit,
+                        check_name,
+                        args.batch,
+                        job_config.num_batches,
+                        job_status,
                     )
                     previous_status = job_status.status
-                    print("::group::Commit Status Data")
-                    print(job_status)
-                    print("::endgroup::")
+                    GHActions.print_in_group("Commit Status Data", job_status)
 
         if previous_status:
             print(
@@ -1648,7 +1784,7 @@ def main() -> int:
             if CI_CONFIG.is_build_job(args.job_name):
                 assert (
                     indata
-                ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]"
+                ), f"--infile with config must be provided for POST action of a build type job [{args.job_name}]"
                 build_name = args.job_name
                 s3_path_prefix = "/".join(
                     (
@@ -1676,7 +1812,7 @@ def main() -> int:
                     (
                         get_release_or_pr(pr_info, get_version_from_repo())[0],
                         pr_info.sha,
-                        CI_CONFIG.normalize_string(
+                        normalize_string(
                             job_report.check_name or _get_ext_check_name(args.job_name)
                         ),
                     )
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 6036a04080c..7c8990e8d16 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -22,6 +22,8 @@ class Labels(metaclass=WithIter):
     CI_SET_ARM = "ci_set_arm"
     CI_SET_INTEGRATION = "ci_set_integration"
 
+    libFuzzer = "libFuzzer"
+
 
 class Build(metaclass=WithIter):
     PACKAGE_RELEASE = "package_release"
@@ -193,6 +195,8 @@ class JobConfig:
     required_on_release_branch: bool = False
     # job is for pr workflow only
     pr_only: bool = False
+    # job is for release/master branches only
+    release_only: bool = False
 
 
 @dataclass
@@ -790,6 +794,7 @@ CI_CONFIG = CiConfig(
             name=Build.FUZZERS,
             compiler="clang-17",
             package_type="fuzzers",
+            job_config=JobConfig(run_by_label=Labels.libFuzzer),
         ),
     },
     builds_report_config={
@@ -824,7 +829,7 @@ CI_CONFIG = CiConfig(
     },
     other_jobs_configs={
         JobNames.MARK_RELEASE_READY: TestConfig(
-            "", job_config=JobConfig(required_on_release_branch=True)
+            "", job_config=JobConfig(release_only=True)
         ),
         JobNames.DOCKER_SERVER: TestConfig(
             "",
@@ -909,13 +914,6 @@ CI_CONFIG = CiConfig(
         JobNames.STATEFUL_TEST_AARCH64: TestConfig(
             Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
-        # FIXME: delete?
-        # "Stateful tests (release, DatabaseOrdinary)": TestConfig(
-        #     Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
-        # ),
-        # "Stateful tests (release, DatabaseReplicated)": TestConfig(
-        #     Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
-        # ),
         # Stateful tests for parallel replicas
         JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
@@ -997,16 +995,16 @@ CI_CONFIG = CiConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(**upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(**upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(**upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_DEBUG: TestConfig(
-            Build.PACKAGE_DEBUG, job_config=JobConfig(**upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.INTEGRATION_TEST_ASAN: TestConfig(
             Build.PACKAGE_ASAN,
@@ -1033,7 +1031,7 @@ CI_CONFIG = CiConfig(
             job_config=JobConfig(num_batches=4, **integration_test_common_params),  # type: ignore
         ),
         JobNames.INTEGRATION_TEST_FLAKY: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(**integration_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params)  # type: ignore
         ),
         JobNames.COMPATIBILITY_TEST: TestConfig(
             Build.PACKAGE_RELEASE,
@@ -1080,7 +1078,7 @@ CI_CONFIG = CiConfig(
         JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig(
             # replace to non-default
             Build.PACKAGE_ASAN,
-            job_config=JobConfig(**{**statless_test_common_params, "timeout": 3600}),  # type: ignore
+            job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}),  # type: ignore
         ),
         JobNames.JEPSEN_KEEPER: TestConfig(
             Build.BINARY_RELEASE,
@@ -1116,7 +1114,7 @@ CI_CONFIG = CiConfig(
         ),
         JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE),
         JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64),
-        JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS),  # type: ignore
+        JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS, job_config=JobConfig(run_by_label=Labels.libFuzzer)),  # type: ignore
     },
 )
 CI_CONFIG.validate()
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index 7e2a3d11725..2967ec2f309 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
 import os
-from typing import List, Union, Iterator
+from typing import Any, List, Union, Iterator
 from pathlib import Path
 
 
@@ -27,9 +27,22 @@ def is_hex(s):
         return False
 
 
+def normalize_string(string: str) -> str:
+    lowercase_string = string.lower()
+    normalized_string = (
+        lowercase_string.replace(" ", "_")
+        .replace("-", "_")
+        .replace("/", "_")
+        .replace("(", "")
+        .replace(")", "")
+        .replace(",", "")
+    )
+    return normalized_string
+
+
 class GHActions:
     @staticmethod
-    def print_in_group(group_name: str, lines: Union[str, List[str]]) -> None:
+    def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None:
         lines = list(lines)
         print(f"::group::{group_name}")
         for line in lines:
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 5dd2a33adaf..8a34d375d1e 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -370,6 +370,9 @@ class CommitStatusData:
     def is_ok(self):
         return self.status == SUCCESS
 
+    def is_failure(self):
+        return self.status == FAILURE
+
     @staticmethod
     def cleanup():
         STATUS_FILE_PATH.unlink(missing_ok=True)
diff --git a/tests/ci/report.py b/tests/ci/report.py
index ce20c7293f9..ef09e9738ee 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -23,6 +23,7 @@ from typing import (
 from build_download_helper import get_gh_api
 from ci_config import CI_CONFIG, BuildConfig
 from env_helper import REPORT_PATH, TEMP_PATH
+from ci_utils import normalize_string
 
 logger = logging.getLogger(__name__)
 
@@ -550,7 +551,7 @@ class BuildResult:
 
     def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path:
         path = Path(directory) / self.get_report_name(
-            self.build_name, self.pr_number or self.head_ref
+            self.build_name, self.pr_number or normalize_string(self.head_ref)
         )
         path.write_text(
             json.dumps(
diff --git a/tests/ci/test_ci_cache.py b/tests/ci/test_ci_cache.py
index 0f8acf2656c..3cdd6c78390 100644
--- a/tests/ci/test_ci_cache.py
+++ b/tests/ci/test_ci_cache.py
@@ -96,16 +96,27 @@ class TestCiCache(unittest.TestCase):
             pr_num=PR_NUM,
         )
 
-        ### add some pending statuses for two batches and on non-release branch
+        ### add some pending statuses for two batches, non-release branch
         for job in JobNames:
-            ci_cache.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False)
-            ci_cache_2.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False)
+            ci_cache.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False)
+            ci_cache_2.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False)
 
         ### add success status for 0 batch, non-release branch
+        batch = 0
         for job in JobNames:
-            ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=False)
+            ci_cache.push_successful(
+                job, batch, NUM_BATCHES, status, release_branch=False
+            )
             ci_cache_2.push_successful(
-                job, 0, NUM_BATCHES, status, release_branch=False
+                job, batch, NUM_BATCHES, status, release_branch=False
+            )
+
+        ### add failed status for 2 batch, non-release branch
+        batch = 2
+        for job in JobNames:
+            ci_cache.push_failed(job, batch, NUM_BATCHES, status, release_branch=False)
+            ci_cache_2.push_failed(
+                job, batch, NUM_BATCHES, status, release_branch=False
             )
 
         ### check all expected directories were created on s3 mock
@@ -128,7 +139,7 @@ class TestCiCache(unittest.TestCase):
         )
 
         ### check number of cache files is as expected
-        FILES_PER_JOB = 3  # 1 successful + 2 pending batches = 3
+        FILES_PER_JOB = 5  # 1 successful + 1 failed + 3 pending batches = 5
         self.assertEqual(
             len(
                 s3_mock.files_on_s3_paths[
@@ -219,7 +230,7 @@ class TestCiCache(unittest.TestCase):
             ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=True)
 
         ### check number of cache files is as expected
-        FILES_PER_JOB = 6  # 1 successful + 1 successful_release + 2 pending batches + 2 pending batches release = 6
+        FILES_PER_JOB = 8  # 1 successful + 1 failed + 1 successful_release + 3 pending batches + 2 pending batches release = 8
         self.assertEqual(
             len(
                 s3_mock.files_on_s3_paths[
@@ -252,6 +263,9 @@ class TestCiCache(unittest.TestCase):
             self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True)
             self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True)
 
+            self.assertEqual(ci_cache.is_failed(job, 2, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_failed(job, 2, NUM_BATCHES, True), False)
+
             status2 = ci_cache.get_successful(job, 0, NUM_BATCHES)
             assert status2 and status2.pr_num == PR_NUM
             status2 = ci_cache.get_successful(job, 1, NUM_BATCHES)
@@ -273,6 +287,13 @@ class TestCiCache(unittest.TestCase):
             self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True)
             self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True)
 
+            self.assertEqual(ci_cache.is_failed(job, 2, NUM_BATCHES, False), True)
+            self.assertEqual(ci_cache.is_failed(job, 2, NUM_BATCHES, True), False)
+
+            # is_pending() is false for failed jobs batches
+            self.assertEqual(ci_cache.is_pending(job, 2, NUM_BATCHES, False), False)
+            self.assertEqual(ci_cache.is_pending(job, 2, NUM_BATCHES, True), False)
+
             status2 = ci_cache.get_successful(job, 0, NUM_BATCHES)
             assert status2 and status2.pr_num == PR_NUM
             status2 = ci_cache.get_successful(job, 1, NUM_BATCHES)

From b112fd1e3cde6088ec403fe9c2fca6b2dd828f4f Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 8 Feb 2024 19:09:28 +0000
Subject: [PATCH 484/884] CI: ci test await

 #do_not_test
---
 tests/ci/ci.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 5c33d4e02a8..ce8d1c8c20e 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1274,7 +1274,7 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
     with concurrent.futures.ThreadPoolExecutor() as executor:
         futures = []
         for job in job_digests:
-            if job not in jobs_to_skip or job not in jobs_to_do:
+            if job not in jobs_to_skip and job not in jobs_to_do:
                 # no need to create status for job that are not supposed to be executed
                 continue
             if CI_CONFIG.is_build_job(job):

From a2beae80f5077e0a477df9a136f8f32224e67246 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 20:57:33 +0100
Subject: [PATCH 485/884] Improve test

---
 .../02985_shard_query_start_time.reference    |  4 ++--
 .../02985_shard_query_start_time.sql          | 21 ++++++++++++-------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02985_shard_query_start_time.reference b/tests/queries/0_stateless/02985_shard_query_start_time.reference
index 1957f3a9604..ff563ea1d53 100644
--- a/tests/queries/0_stateless/02985_shard_query_start_time.reference
+++ b/tests/queries/0_stateless/02985_shard_query_start_time.reference
@@ -1,2 +1,2 @@
-1	1
-1	1
+QueryStart	2	2	2	2
+QueryFinish	2	2	2	2
diff --git a/tests/queries/0_stateless/02985_shard_query_start_time.sql b/tests/queries/0_stateless/02985_shard_query_start_time.sql
index b0d8d2b6e53..c31d81e58ae 100644
--- a/tests/queries/0_stateless/02985_shard_query_start_time.sql
+++ b/tests/queries/0_stateless/02985_shard_query_start_time.sql
@@ -1,29 +1,34 @@
 DROP TABLE IF EXISTS sharded_table;
 CREATE TABLE sharded_table (dummy UInt8) ENGINE = Distributed('test_cluster_two_shards', 'system', 'one');
 
+SET prefer_localhost_replica=0;
 SELECT * FROM sharded_table FORMAT Null SETTINGS log_comment='02985_shard_query_start_time_query_1';
 
 SYSTEM FLUSH LOGS;
 
--- We do not test for query_start_time because that would conflict pretty easily
+-- Check that there are 2 queries to shards and for each one query_start_time_microseconds is more recent
+-- than initial_query_start_time_microseconds, and initial_query_start_time_microseconds matches the original query
+-- query_start_time_microseconds
 WITH
 (
     SELECT
-        (query_id, query_start_time_microseconds)
+        (query_id, query_start_time, query_start_time_microseconds)
     FROM
         system.query_log
     WHERE
-            event_date >= yesterday()
+          event_date >= yesterday()
       AND current_database = currentDatabase()
       AND log_comment = '02985_shard_query_start_time_query_1'
       AND type = 'QueryFinish'
-    ORDER BY query_start_time_microseconds DESC
-    LIMIT 1
 ) AS id_and_start_tuple
 SELECT
-    query_start_time_microseconds > initial_query_start_time_microseconds,
-    initial_query_start_time_microseconds = id_and_start_tuple.2
+    type,
+    countIf(query_start_time >= initial_query_start_time), -- Using >= because it's comparing seconds
+    countIf(query_start_time_microseconds > initial_query_start_time_microseconds),
+    countIf(initial_query_start_time = id_and_start_tuple.2),
+    countIf(initial_query_start_time_microseconds = id_and_start_tuple.3)
 FROM
     system.query_log
 WHERE
-    NOT is_initial_query AND initial_query_id = id_and_start_tuple.1;
+    NOT is_initial_query AND initial_query_id = id_and_start_tuple.1
+GROUP BY type;

From 075da5602fdc03d4d5b15cd8d769704259b168a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 8 Feb 2024 21:10:49 +0100
Subject: [PATCH 486/884] Remove incorrect test

---
 ...nt_info_initial_query_start_time.reference |  8 ---
 ...de_client_info_initial_query_start_time.sh | 67 -------------------
 2 files changed, 75 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
 delete mode 100755 tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh

diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
deleted file mode 100644
index fbce8ae2026..00000000000
--- a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
+++ /dev/null
@@ -1,8 +0,0 @@
-SELECT
-3	0	0
-3	0	0
-INSERT
-CHECK
-1
-2
-6	0	2
diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh
deleted file mode 100755
index 5da643bd17b..00000000000
--- a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-fasttest
-# Tag no-fasttest: interserver mode requires SSL
-#
-# Test that checks that some of ClientInfo correctly passed in inter-server mode.
-# NOTE: we need .sh test (.sql is not enough) because queries on remote nodes does not have current_database = currentDatabase()
-#
-# Check-style suppression: select * from system.query_log where current_database = currentDatabase();
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-function get_query_id() { random_str 10; }
-
-$CLICKHOUSE_CLIENT -nm -q "
-    drop table if exists buf;
-    drop table if exists dist;
-    drop table if exists data;
-
-    create table data (key Int) engine=Memory();
-    create table dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), data, key);
-    create table dist_dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), dist, key);
-    system stop distributed sends dist;
-"
-
-echo "SELECT"
-query_id="$(get_query_id)"
-# initialize connection, but actually if there are other tables that uses this
-# cluster then, it will be created long time ago, but this is OK for this
-# test, since we care about the difference between NOW() and there should
-# not be any significant difference.
-$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist"
-$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
-    system flush logs;
-    select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
-"
-
-sleep 6
-
-query_id="$(get_query_id)"
-# this query (and all subsequent) should reuse the previous connection (at least most of the time)
-$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist"
-
-$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
-    system flush logs;
-    select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
-"
-
-echo "INSERT"
-query_id="$(get_query_id)"
-$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -nm -q "
-    insert into dist_dist values (1),(2);
-    select * from data;
-"
-
-sleep 3
-$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist_dist"
-sleep 1
-$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist"
-
-echo "CHECK"
-$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
-    select * from data order by key;
-    system flush logs;
-    select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
-"

From 91c607f249fe6320c5c6a3bbffd61a6dcc44ff49 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Feb 2024 22:11:37 +0100
Subject: [PATCH 487/884] Fix a test

---
 docker/test/base/setup_export_logs.sh                     | 8 ++++----
 src/Dictionaries/CacheDictionary.cpp                      | 2 --
 .../0_stateless/02982_comments_in_system_tables.sh        | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 6209336ab4e..917701b8d35 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -140,10 +140,10 @@ function setup_logs_replication
     clickhouse-client --query "
         CREATE TABLE IF NOT EXISTS system.coverage_log
         (
-            time DateTime,
-            test_name String,
-            coverage Array(UInt64)
-        ) ENGINE = MergeTree ORDER BY test_name
+            time DateTime COMMENT 'The time of test run',
+            test_name String COMMENT 'The name of the test',
+            coverage Array(UInt64) COMMENT 'An array of addresses of the code (a subset of addresses instrumented for coverage) that were encountered during the test run'
+        ) ENGINE = MergeTree ORDER BY test_name COMMENT 'Contains information about per-test coverage from the CI'
     "
 
     # For each system log table:
diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 000f0ef5b4c..bf3d5a5cd12 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -3,9 +3,7 @@
 #include <memory>
 #include <base/chrono_io.h>
 
-#include <Core/Defines.h>
 #include <Common/CurrentMetrics.h>
-#include <Common/HashTable/Hash.h>
 #include <Common/HashTable/HashSet.h>
 #include <Common/ProfileEvents.h>
 #include <Common/ProfilingScopedRWLock.h>
diff --git a/tests/queries/0_stateless/02982_comments_in_system_tables.sh b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
index cc04592bc27..0825b1be2d6 100755
--- a/tests/queries/0_stateless/02982_comments_in_system_tables.sh
+++ b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
@@ -4,5 +4,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-${CLICKHOUSE_LOCAL}  --query "SELECT 'Table ' || database || '.' || name || ' doesnt have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
-${CLICKHOUSE_CLIENT} --query "SELECT 'Table ' || database || '.' || name || ' doesnt have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
+${CLICKHOUSE_LOCAL}  --query "SELECT 'Table ' || database || '.' || name || ' doesn\'t have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
+${CLICKHOUSE_CLIENT} --query "SELECT 'Table ' || database || '.' || name || ' doesn\'t have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"

From e598ad9157e63cfd08800839c5c2d4d9a15ba95f Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Thu, 8 Feb 2024 21:26:48 +0000
Subject: [PATCH 488/884] Better logging for adaptive async timeouts

Log the adaptive async timeout value after normalization.
---
 src/Interpreters/AsynchronousInsertQueue.cpp | 34 ++++++++------------
 src/Interpreters/AsynchronousInsertQueue.h   |  1 -
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index e7f292d9b77..44cc58cec84 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -353,7 +353,18 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
 
         auto [it, inserted] = shard.iterators.try_emplace(key.hash);
         auto now = std::chrono::steady_clock::now();
-        auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, flush_time_points, now);
+        auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, flush_time_points, now);
+        if (timeout_ms != shard.busy_timeout_ms)
+        {
+            LOG_TRACE(
+                log,
+                "Asynchronous timeout {} from {} to {} for queue shard {}.",
+                timeout_ms < shard.busy_timeout_ms ? "decreased" : "increased",
+                shard.busy_timeout_ms.count(),
+                timeout_ms.count(),
+                size_t(shard_num));
+        }
+
         if (inserted)
             it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
 
@@ -431,7 +442,6 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
 AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
     const Settings & settings,
     const QueueShard & shard,
-    size_t shard_num,
     const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
     std::chrono::steady_clock::time_point now) const
 {
@@ -460,13 +470,6 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
         auto timeout_ms = std::max(
             std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms * (1.0 + increase_rate)),
             shard.busy_timeout_ms + Milliseconds(1));
-        if (timeout_ms != shard.busy_timeout_ms)
-            LOG_TRACE(
-                log,
-                "Async timeout increased from {} to {} for queue shard {}.",
-                shard.busy_timeout_ms.count(),
-                timeout_ms.count(),
-                shard_num);
 
         return normalize(timeout_ms);
     }
@@ -475,18 +478,7 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
     /// long enough (exceeding the adjusted timeout).
     /// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
     else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2)
-    {
-        auto timeout_ms = decreased_timeout_ms;
-        if (timeout_ms != shard.busy_timeout_ms)
-            LOG_TRACE(
-                log,
-                "Async timeout decreased from {} to {} for queue shard {}.",
-                shard.busy_timeout_ms.count(),
-                timeout_ms.count(),
-                shard_num);
-
-        return normalize(timeout_ms);
-    }
+        return normalize(decreased_timeout_ms);
 
     return normalize(shard.busy_timeout_ms);
 }
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index c2c4755f192..17140030766 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -248,7 +248,6 @@ private:
     Milliseconds getBusyWaitTimeoutMs(
         const Settings & settings,
         const QueueShard & shard,
-        size_t shard_num,
         const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
         std::chrono::steady_clock::time_point now) const;
 

From dbc288cd51affc3f6758a66ac51130aa8b5b565f Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 8 Feb 2024 23:13:44 +0100
Subject: [PATCH 489/884] Update ne-tormozit.md

Fix broken youtube video embedding
---
 docs/ru/faq/general/ne-tormozit.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/faq/general/ne-tormozit.md b/docs/ru/faq/general/ne-tormozit.md
index 0f888de839f..6d0803680a8 100644
--- a/docs/ru/faq/general/ne-tormozit.md
+++ b/docs/ru/faq/general/ne-tormozit.md
@@ -20,6 +20,6 @@ sidebar_position: 11
 
 Если вы не видели наших футболок, посмотрите видео о ClickHouse. Например, вот это:
 
-![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
+<iframe width="675" height="380" src="https://www.youtube.com/embed/bSyQahMVZ7w" frameborder="0" allow="accelerometer; autoplay; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
 P.S. Эти футболки не продаются, а распространяются бесплатно на большинстве митапов [ClickHouse](https://clickhouse.com/#meet), обычно в награду за самые интересные вопросы или другие виды активного участия.

From c12e34df199c0c30764dfa865cc30da427635aca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Feb 2024 23:28:29 +0100
Subject: [PATCH 490/884] Fix report

---
 .github/workflows/master.yml       | 1 +
 .github/workflows/pull_request.yml | 1 +
 tests/ci/ci_config.py              | 9 +++------
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 653b46300ac..1bebe80ca7e 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -285,6 +285,7 @@ jobs:
       - BuilderDebDebug
       - BuilderDebMsan
       - BuilderDebRelease
+      - BuilderDebReleaseCoverage
       - BuilderDebTsan
       - BuilderDebUBsan
     uses: ./.github/workflows/reusable_test.yml
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 6a5bb851447..405e1ec1502 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -317,6 +317,7 @@ jobs:
       - BuilderDebDebug
       - BuilderDebMsan
       - BuilderDebRelease
+      - BuilderDebReleaseCoverage
       - BuilderDebTsan
       - BuilderDebUBsan
     uses: ./.github/workflows/reusable_test.yml
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b40584ff1a8..817018f044c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -204,10 +204,9 @@ class JobConfig:
 
 # About the "sparse_checkout" option:
 #
-# Misha f. Shiryaev
-# :facepalm:
-# we have this feature, it's used by devs, we need to test it in CI
-# It's not useful for the CI itself
+# It's needed only to test the option itself.
+# No particular sense to use it in the build,
+# and it is even slows down the job.
 
 
 @dataclass
@@ -730,7 +729,6 @@ CI_CONFIG = CIConfig(
             compiler="clang-17",
             coverage=True,
             package_type="deb",
-            sparse_checkout=True,
         ),
         Build.BINARY_RELEASE: BuildConfig(
             name=Build.BINARY_RELEASE,
@@ -751,7 +749,6 @@ CI_CONFIG = CIConfig(
             compiler="clang-17-darwin",
             package_type="binary",
             static_binary_name="macos",
-            sparse_checkout=True,  # Check that it works with at least one build, see also update-submodules.sh
         ),
         Build.BINARY_AARCH64: BuildConfig(
             name=Build.BINARY_AARCH64,

From 41ff8b3217595a6d24eaaf973e73b04f09242ae6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:15:52 +0100
Subject: [PATCH 491/884] Reading Docker files

---
 docker/test/stateless/Dockerfile | 1 -
 docker/test/util/Dockerfile      | 8 ++------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index f09ba46de1e..7f4bad3d4e6 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -74,7 +74,6 @@ RUN arch=${TARGETARCH:-amd64} \
     && wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \
     && chmod +x ./mc ./minio
 
-
 RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \
     && tar -xvf hadoop-3.3.1.tar.gz \
     && rm -rf hadoop-3.3.1.tar.gz
diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile
index 054eac5f764..396d5801be9 100644
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@@ -5,7 +5,6 @@ FROM ubuntu:22.04
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
-# 15.0.2
 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17
 
 RUN apt-get update \
@@ -30,8 +29,7 @@ RUN apt-get update \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
-
-# Install cmake 3.20+ for rust support
+# Install cmake 3.20+ for Rust support
 # Used https://askubuntu.com/a/1157132 as reference
 RUN curl -s https://apt.kitware.com/keys/kitware-archive-latest.asc | \
         gpg --dearmor - > /etc/apt/trusted.gpg.d/kitware.gpg && \
@@ -65,8 +63,7 @@ RUN apt-get update \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
-
-# This symlink required by gcc to find lld compiler
+# This symlink is required by gcc to find the lld linker
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
 # for external_symbolizer_path
 RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
@@ -111,5 +108,4 @@ RUN arch=${TARGETARCH:-amd64} \
   && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \
   && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r
 
-
 COPY process_functional_tests_result.py /

From 1b91b7b999e8681d972e3a5a93a4a166bbfd4e2a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:16:15 +0100
Subject: [PATCH 492/884] Better logs in CI and non-interactive install

---
 programs/install/Install.cpp | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 52f30098b38..978b5cc38ba 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -79,10 +79,6 @@ namespace ErrorCodes
 
 }
 
-/// ANSI escape sequence for intense color in terminal.
-#define HILITE "\033[1m"
-#define END_HILITE "\033[0m"
-
 #if defined(OS_DARWIN)
 /// Until createUser() and createGroup() are implemented, only sudo-less installations are supported/default for macOS.
 static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = "";
@@ -216,6 +212,16 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 {
     try
     {
+        const char * start_hilite = "";
+        const char * end_hilite = "";
+
+        if (isatty(STDOUT_FILENO))
+        {
+            /// ANSI escape sequence for intense color in terminal.
+            start_hilite = "\033[1m";
+            end_hilite = "\033[0m";
+        }
+
         po::options_description desc;
         desc.add_options()
             ("help,h", "produce help message")
@@ -799,13 +805,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
         /// Set up password for default user.
         if (has_password_for_default_user)
         {
-            fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n",
-                       users_config_file.string(), users_d.string());
+            fmt::print("{}Password for default user is already specified. To remind or reset, see {} and {}.{}\n",
+                start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
         else if (!can_ask_password)
         {
-            fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
-                       users_config_file.string(), users_d.string());
+            fmt::print("{}Password for default user is empty string. See {} and {} to change it.{}\n",
+                start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
         else
         {
@@ -839,7 +845,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                     "</clickhouse>\n";
                 out.sync();
                 out.finalize();
-                fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
+                fmt::print("{}Password for default user is saved in file {}.{}\n", start_hilite, password_file, end_hilite);
 #else
                 out << "<clickhouse>\n"
                     "    <users>\n"
@@ -850,13 +856,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                     "</clickhouse>\n";
                 out.sync();
                 out.finalize();
-                fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
+                fmt::print("{}Password for default user is saved in plaintext in file {}.{}\n", start_hilite, password_file, end_hilite);
 #endif
                 has_password_for_default_user = true;
             }
             else
-                fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
-                           users_config_file.string(), users_d.string());
+                fmt::print("{}Password for default user is empty string. See {} and {} to change it.{}\n",
+                    start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
 
         /** Set capabilities for the binary.

From a585ae60a85aa22bfee4f3e2a47685c915cbb770 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:30:29 +0100
Subject: [PATCH 493/884] English (somewhat incomplete)

---
 programs/install/Install.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 978b5cc38ba..a4a4672f5c4 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -713,7 +713,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
         {
             fmt::print("Users config file {} already exists, will keep it and extract users info from it.\n", users_config_file.string());
 
-            /// Check if password for default user already specified.
+            /// Check if password for the default user already specified.
             ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
             ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
 
@@ -805,12 +805,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
         /// Set up password for default user.
         if (has_password_for_default_user)
         {
-            fmt::print("{}Password for default user is already specified. To remind or reset, see {} and {}.{}\n",
+            fmt::print("{}Password for the default user is already specified. To remind or reset, see {} and {}.{}\n",
                 start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
         else if (!can_ask_password)
         {
-            fmt::print("{}Password for default user is empty string. See {} and {} to change it.{}\n",
+            fmt::print("{}Password for the default user is an empty string. See {} and {} to change it.{}\n",
                 start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
         else
@@ -820,7 +820,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
             char buf[1000] = {};
             std::string password;
-            if (auto * result = readpassphrase("Enter password for default user: ", buf, sizeof(buf), 0))
+            if (auto * result = readpassphrase("Enter password for the default user: ", buf, sizeof(buf), 0))
                 password = result;
 
             if (!password.empty())
@@ -845,7 +845,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                     "</clickhouse>\n";
                 out.sync();
                 out.finalize();
-                fmt::print("{}Password for default user is saved in file {}.{}\n", start_hilite, password_file, end_hilite);
+                fmt::print("{}Password for the default user is saved in file {}.{}\n", start_hilite, password_file, end_hilite);
 #else
                 out << "<clickhouse>\n"
                     "    <users>\n"
@@ -856,12 +856,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                     "</clickhouse>\n";
                 out.sync();
                 out.finalize();
-                fmt::print("{}Password for default user is saved in plaintext in file {}.{}\n", start_hilite, password_file, end_hilite);
+                fmt::print("{}Password for the default user is saved in plaintext in file {}.{}\n", start_hilite, password_file, end_hilite);
 #endif
                 has_password_for_default_user = true;
             }
             else
-                fmt::print("{}Password for default user is empty string. See {} and {} to change it.{}\n",
+                fmt::print("{}Password for the default user is an empty string. See {} and {} to change it.{}\n",
                     start_hilite, users_config_file.string(), users_d.string(), end_hilite);
         }
 

From f386e34b912d3ba2b4af4abff1632b6f4985f42d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:45:16 +0100
Subject: [PATCH 494/884] Fix an error by rewriting the CI from Perl to Python

---
 tests/ci/build_download_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index 21012f6337d..99044e786cc 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -113,7 +113,7 @@ def get_build_name_for_check(check_name: str) -> str:
 def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str]:
     for root, _, files in os.walk(reports_path):
         for f in files:
-            if build_name in f:
+            if f.endswith(f"_{build_name}.json"):
                 logging.info("Found build report json %s", f)
                 with open(os.path.join(root, f), "r", encoding="utf-8") as file_handler:
                     build_report = json.load(file_handler)

From ad665f9b063f49903c74771bcb92091906926411 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:46:12 +0100
Subject: [PATCH 495/884] Better

---
 tests/ci/build_download_helper.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index 99044e786cc..74478fd8038 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -112,10 +112,10 @@ def get_build_name_for_check(check_name: str) -> str:
 
 def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str]:
     for root, _, files in os.walk(reports_path):
-        for f in files:
-            if f.endswith(f"_{build_name}.json"):
-                logging.info("Found build report json %s", f)
-                with open(os.path.join(root, f), "r", encoding="utf-8") as file_handler:
+        for file in files:
+            if file.endswith(f"_{build_name}.json"):
+                logging.info("Found build report json %s", file)
+                with open(os.path.join(root, file), "r", encoding="utf-8") as file_handler:
                     build_report = json.load(file_handler)
                     return build_report["build_urls"]  # type: ignore
     return []

From b58a2387ef12e5b18c10b22827063412b0df56e5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 00:49:48 +0100
Subject: [PATCH 496/884] Probably better

---
 packages/build | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/packages/build b/packages/build
index c2285b8ee7c..b2dd085d9dd 100755
--- a/packages/build
+++ b/packages/build
@@ -130,6 +130,8 @@ if [ -n "$SANITIZER" ]; then
     fi
 elif [[ $BUILD_TYPE == 'debug' ]]; then
     VERSION_POSTFIX+="+debug"
+elif [[ $BUILD_TYPE =~ 'coverage' ]]; then
+    VERSION_POSTFIX+="+coverage"
 fi
 
 if [[ "$PKG_ROOT" != "$SOURCE" ]]; then

From 07ae125a52c60370d74f4fe44c5fd29b0bc328c2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:34:00 +0000
Subject: [PATCH 497/884] Automatic style fix

---
 tests/ci/build_download_helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index 74478fd8038..0d24cb80021 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -115,7 +115,9 @@ def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str
         for file in files:
             if file.endswith(f"_{build_name}.json"):
                 logging.info("Found build report json %s", file)
-                with open(os.path.join(root, file), "r", encoding="utf-8") as file_handler:
+                with open(
+                    os.path.join(root, file), "r", encoding="utf-8"
+                ) as file_handler:
                     build_report = json.load(file_handler)
                     return build_report["build_urls"]  # type: ignore
     return []

From aa004cabffdc5a7a40b0090f3540da0464c0924b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 02:00:05 +0100
Subject: [PATCH 498/884] Fix tests

---
 src/Parsers/IParser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index d5c9d48d730..e8f95954e70 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -82,7 +82,7 @@ public:
               * The frequency is arbitrary, but not too large, not too small,
               * and a power of two to simplify the division.
               */
-#ifdef USE_MUSL
+#if defined(USE_MUSL) || defined(SANITIZER)
             static constexpr uint32_t check_frequency = 128;
 #else
             static constexpr uint32_t check_frequency = 8192;

From fe6d8316a35ce5ca93759a3eac2e36193476af22 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 17:34:43 -0800
Subject: [PATCH 499/884] [Docs] Add default cloud core settings

---
 docs/en/operations/settings/settings.md | 44 ++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 3a826b095d2..a552616b3ec 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -508,7 +508,9 @@ Possible values:
 - Any positive integer number of hops.
 - 0 — No hops allowed.
 
-Default value: 0.
+Default value: `0`.
+
+Cloud default value: `10`.
 
 ## insert_null_as_default {#insert_null_as_default}
 
@@ -1126,7 +1128,9 @@ Possible values:
 - 0 (or 1) — `INSERT SELECT` no parallel execution.
 - Positive integer. Bigger than 1.
 
-Default value: 0.
+Default value: `0`.
+
+Cloud default value: `2`.
 
 Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
 Higher values will lead to higher memory usage.
@@ -1207,7 +1211,9 @@ Default value: 10000.
 
 Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
 
-Default value: 0
+Default value: `0`.
+
+Cloud default value: `1`.
 
 ## poll_interval {#poll-interval}
 
@@ -1946,6 +1952,8 @@ Possible values:
 
 Default value: `200`.
 
+Cloud default value: `1000`.
+
 ### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
 
 Timeout in milliseconds for polling data from asynchronous insert queue.
@@ -2132,6 +2140,8 @@ Possible values:
 
 Default value: 0
 
+Cloud default value: `20`.
+
 Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
 The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
 ```
@@ -2660,6 +2670,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 Default value: 1000000000 nanoseconds (once a second).
 
+Cloud default value: `0`.
+
 See also:
 
 - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@@ -2683,6 +2695,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 Default value: 1000000000 nanoseconds.
 
+Cloud default value: `0`.
+
 See also:
 
 - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@@ -2804,6 +2818,8 @@ Possible values:
 
 Default value: `0`.
 
+Cloud default value: `1`.
+
 **See Also**
 
 - [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
@@ -3319,7 +3335,9 @@ Possible values:
 
 - a string representing any valid table engine name
 
-Default value: `None`
+Default value: `None`.
+
+Cloud default value: `SharedMergeTree`.
 
 **Example**
 
@@ -3689,6 +3707,8 @@ Possible values:
 
 Default value: `0`.
 
+Cloud default value: `1`.
+
 ## live_view_heartbeat_interval {#live-view-heartbeat-interval}
 
 Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md/#live-view) is alive .
@@ -3933,6 +3953,8 @@ Possible values:
 
 Default value: `throw`.
 
+Cloud default value: `none`.
+
 ## flatten_nested {#flatten-nested}
 
 Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
@@ -4068,6 +4090,8 @@ Possible values:
 
 Default value: `1`.
 
+Cloud default value: `0`.
+
 :::note
 `alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables.
 :::
@@ -4723,6 +4747,8 @@ other connections are cancelled. Queries with `max_parallel_replicas > 1` are su
 
 Enabled by default.
 
+Disabled by default on Cloud.
+
 ## hedged_connection_timeout {#hedged_connection_timeout}
 
 If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous.
@@ -5348,10 +5374,11 @@ Default value: `false`.
 
 ## max_partition_size_to_drop
 
-Restriction on dropping partitions in query time.
+Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
 
 Default value: 50 GB.
-The value 0 means that you can drop partitions without any restrictions.
+
+Cloud default value: 1 TB.
 
 :::note
 This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
@@ -5359,10 +5386,11 @@ This query setting overwrites its server setting equivalent, see [max_partition_
 
 ## max_table_size_to_drop
 
-Restriction on deleting tables in query time.
+Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions.
 
 Default value: 50 GB.
-The value 0 means that you can delete all tables without any restrictions.
+
+Cloud default value: 1 TB.
 
 :::note
 This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)

From d17a12fe6451e02dc13101cbdacb4e81951c239e Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 17:46:37 -0800
Subject: [PATCH 500/884] [Docs] Fix for default cloud core settings

---
 docs/en/operations/settings/settings.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index a552616b3ec..b6db3ccc197 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3707,8 +3707,6 @@ Possible values:
 
 Default value: `0`.
 
-Cloud default value: `1`.
-
 ## live_view_heartbeat_interval {#live-view-heartbeat-interval}
 
 Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md/#live-view) is alive .
@@ -3915,6 +3913,8 @@ Possible values:
 
 Default value: `0`.
 
+Cloud default value: `1`.
+
 ## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
 
 Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.

From 2954cdf200f9f03dc1fa4c1a5fff37454edba3b7 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 18:06:34 -0800
Subject: [PATCH 501/884] [Docs] Add cloud default values for query complexity

---
 .../operations/settings/query-complexity.md   | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 9a80f977ed1..f6d5a2a5017 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -1,4 +1,4 @@
----
+![image](https://github.com/ClickHouse/ClickHouse/assets/3936029/17039d32-4699-4498-bcff-947079345a66)---
 slug: /en/operations/settings/query-complexity
 sidebar_position: 59
 sidebar_label: Restrictions on Query Complexity
@@ -28,6 +28,8 @@ The maximum amount of RAM to use for running a query on a single server.
 
 The default setting is unlimited (set to `0`).
 
+Cloud default value: 17 GB.
+
 The setting does not consider the volume of available memory or the total volume of memory on the machine.
 The restriction applies to a single query within a single server.
 You can use `SHOW PROCESSLIST` to see the current memory consumption for each query.
@@ -104,7 +106,9 @@ Possible values:
 - Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation.
 - 0 — `GROUP BY` in external memory disabled.
 
-Default value: 0.
+Default value: `0`.
+
+Cloud default value: 8 GB.
 
 ## max_bytes_before_external_sort {#settings-max_bytes_before_external_sort}
 
@@ -115,6 +119,8 @@ Enables or disables execution of `ORDER BY` clauses in external memory. See [ORD
 
 Default value: 0.
 
+Cloud default value: 8 GB.
+
 ## max_rows_to_sort {#max-rows-to-sort}
 
 A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
@@ -129,7 +135,11 @@ What to do if the number of rows received before sorting exceeds one of the limi
 
 ## max_result_rows {#setting-max_result_rows}
 
-Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
+Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. No limit is applied when value is `0`.
+
+Default value: `0`.
+
+Cloud default value: `500000`.
 
 ## max_result_bytes {#max-result-bytes}
 
@@ -137,10 +147,14 @@ Limit on the number of bytes in the result. The same as the previous setting.
 
 ## result_overflow_mode {#result-overflow-mode}
 
-What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
+What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’.
 
 Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads).
 
+Default value: `throw`.
+
+Cloud default value: `break`.
+
 Example:
 
 ``` sql

From 32305adb0865f67f589565f8e2416dcdf991c693 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 18:10:01 -0800
Subject: [PATCH 502/884] [Docs] Remove accidental link on query complexity
 page

---
 docs/en/operations/settings/query-complexity.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index f6d5a2a5017..9c54e22b01d 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -1,4 +1,4 @@
-![image](https://github.com/ClickHouse/ClickHouse/assets/3936029/17039d32-4699-4498-bcff-947079345a66)---
+---
 slug: /en/operations/settings/query-complexity
 sidebar_position: 59
 sidebar_label: Restrictions on Query Complexity

From f2f8a63ba8aa4c8fe4013632a51b17fc38326c49 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 18:14:21 -0800
Subject: [PATCH 503/884] [Docs] Change cloud default value for
 enable_filesystem_cache_on_write_operations

---
 docs/en/operations/storing-data.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index b3ef1128c42..003277c8d4f 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -206,7 +206,7 @@ Some of these settings will disable cache features per query/profile that are en
 
 - `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
 
-- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`.
+- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. Cloud default value: `true`.
 
 - `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`.
 

From af6ae28f3cf965f7d41db819688b255529e818f9 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 18:16:44 -0800
Subject: [PATCH 504/884] [Docs] Specify cloud default value for
 date_time_input_format

---
 docs/en/operations/settings/settings-formats.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index eb09af44efd..1682e1e4673 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -212,6 +212,8 @@ Possible values:
 
 Default value: `'basic'`.
 
+Cloud default value: `'best_effort'`.
+
 See also:
 
 - [DateTime data type.](../../sql-reference/data-types/datetime.md)

From d466e8d61ef7b4813fd4e632ed0ff7486d8e312f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 03:55:39 +0100
Subject: [PATCH 505/884] Run coverage collection sequentially

---
 docker/test/stateless/run.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index ea76447aef2..55349c6c406 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -185,11 +185,15 @@ function run_tests()
 
     if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
         ADDITIONAL_OPTIONS+=('--replicated-database')
+        # Too many tests fail for DatabaseReplicated in parallel.
         ADDITIONAL_OPTIONS+=('--jobs')
         ADDITIONAL_OPTIONS+=('2')
+    elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%sanitize-coverage%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then
+        # Coverage on a per-test basis could only be collected sequentially.
+        # Do not set the --jobs parameter.
+        echo "Running tests with coverage collection."
     else
-        # Too many tests fail for DatabaseReplicated in parallel. All other
-        # configurations are OK.
+        # All other configurations are OK.
         ADDITIONAL_OPTIONS+=('--jobs')
         ADDITIONAL_OPTIONS+=('8')
     fi

From 2dfd310cf26740344ecc633a5ae438f834649ce7 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Fri, 9 Feb 2024 03:31:30 +0000
Subject: [PATCH 506/884] Hide URL/S3 'headers' argument in SHOW CREATE

---
 src/Parsers/ASTFunction.cpp                   | 121 +++++++++++++++---
 .../0_stateless/02968_url_args.reference      |   9 +-
 tests/queries/0_stateless/02968_url_args.sql  |  17 ++-
 3 files changed, 126 insertions(+), 21 deletions(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index e309dec2131..e7f7b48091a 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -71,6 +71,13 @@ namespace
             size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
                               /// In all known cases secret arguments are consecutive
             bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
+            /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
+            std::vector<std::string> nested_maps;
+
+            bool hasSecrets() const
+            {
+                return count != 0 || !nested_maps.empty();
+            }
         };
 
         Result getResult() const { return result; }
@@ -127,6 +134,10 @@ namespace
                 /// encrypt('mode', 'plaintext', 'key' [, iv, aad])
                 findEncryptionFunctionSecretArguments();
             }
+            else if (function.name == "url")
+            {
+                findURLSecretArguments();
+            }
         }
 
         void findMySQLFunctionSecretArguments()
@@ -143,6 +154,25 @@ namespace
             }
         }
 
+        /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
+        /// always be at the end). Marks "headers" as secret, if found.
+        size_t excludeS3OrURLNestedMaps()
+        {
+            size_t count = arguments->size();
+            while (count > 0)
+            {
+                const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
+                if (!f)
+                    break;
+                if (f->name == "headers")
+                    result.nested_maps.push_back(f->name);
+                else if (f->name != "extra_credentials")
+                    break;
+                count -= 1;
+            }
+            return count;
+        }
+
         void findS3FunctionSecretArguments(bool is_cluster_function)
         {
             /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
@@ -156,9 +186,10 @@ namespace
             }
 
             /// We should check other arguments first because we don't need to do any replacement in case of
-            /// s3('url', NOSIGN, 'format' [, 'compression'])
-            /// s3('url', 'format', 'structure' [, 'compression'])
-            if ((url_arg_idx + 3 <= arguments->size()) && (arguments->size() <= url_arg_idx + 4))
+            /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
+            /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
+            size_t count = excludeS3OrURLNestedMaps();
+            if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
             {
                 String second_arg;
                 if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
@@ -174,7 +205,14 @@ namespace
             /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
             /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
             /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
-            markSecretArgument(url_arg_idx + 2);
+            if (url_arg_idx + 2 < count)
+                markSecretArgument(url_arg_idx + 2);
+        }
+
+        void findURLSecretArguments()
+        {
+            if (!isNamedCollectionName(0))
+                excludeS3OrURLNestedMaps();
         }
 
         bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
@@ -347,6 +385,10 @@ namespace
                 /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
                 findS3TableEngineSecretArguments();
             }
+            else if (engine_name == "URL")
+            {
+                findURLSecretArguments();
+            }
         }
 
         void findExternalDistributedTableEngineSecretArguments()
@@ -373,9 +415,10 @@ namespace
             }
 
             /// We should check other arguments first because we don't need to do any replacement in case of
-            /// S3('url', NOSIGN, 'format' [, 'compression'])
-            /// S3('url', 'format', 'compression')
-            if ((3 <= arguments->size()) && (arguments->size() <= 4))
+            /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
+            /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
+            size_t count = excludeS3OrURLNestedMaps();
+            if ((3 <= count) && (count <= 4))
             {
                 String second_arg;
                 if (tryGetStringFromArgument(1, &second_arg))
@@ -383,7 +426,7 @@ namespace
                     if (boost::iequals(second_arg, "NOSIGN"))
                         return; /// The argument after 'url' is "NOSIGN".
 
-                    if (arguments->size() == 3)
+                    if (count == 3)
                     {
                         if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
                             return; /// The argument after 'url' is a format: S3('url', 'format', ...)
@@ -391,11 +434,12 @@ namespace
                 }
             }
 
-            /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
+            /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
             /// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
             /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
             /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
-            markSecretArgument(2);
+            if (2 < count)
+                markSecretArgument(2);
         }
 
         void findDatabaseEngineSecretArguments()
@@ -724,6 +768,25 @@ ASTSelectWithUnionQuery * ASTFunction::tryGetQueryArgument() const
 }
 
 
+static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame)
+{
+    const auto * equals_func = arg->as<ASTFunction>();
+    if (!equals_func || (equals_func->name != "equals"))
+        return false;
+    const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
+    if (!expr_list)
+        return false;
+    const auto & equal_args = expr_list->children;
+    if (equal_args.size() != 2)
+        return false;
+
+    equal_args[0]->formatImpl(settings, state, frame);
+    settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : "");
+    settings.ostr << "'[HIDDEN]'";
+
+    return true;
+}
+
 void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     frame.expression_list_prepend_whitespace = false;
@@ -1133,17 +1196,37 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
             if (argument->as<ASTSetQuery>())
                 settings.ostr << "SETTINGS ";
 
-            if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count))
+            if (!settings.show_secrets)
             {
-                if (secret_arguments.are_named)
+                if (secret_arguments.start <= i && i < secret_arguments.start + secret_arguments.count)
                 {
-                   assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
-                   settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
+                    if (secret_arguments.are_named)
+                    {
+                        assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
+                        settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
+                    }
+                    settings.ostr << "'[HIDDEN]'";
+                    if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
+                        break; /// All other arguments should also be hidden.
+                    continue;
+                }
+
+                const ASTFunction * function = argument->as<ASTFunction>();
+                if (function && function->arguments && std::count(secret_arguments.nested_maps.begin(), secret_arguments.nested_maps.end(), function->name) != 0)
+                {
+                    /// headers('foo' = '[HIDDEN]', 'bar' = '[HIDDEN]')
+                    settings.ostr << (settings.hilite ? hilite_function : "") << function->name << (settings.hilite ? hilite_none : "") << "(";
+                    for (size_t j = 0; j < function->arguments->children.size(); ++j)
+                    {
+                        if (j != 0)
+                            settings.ostr << ", ";
+                        auto inner_arg = function->arguments->children[j];
+                        if (!formatNamedArgWithHiddenValue(inner_arg.get(), settings, state, nested_dont_need_parens))
+                            inner_arg->formatImpl(settings, state, nested_dont_need_parens);
+                    }
+                    settings.ostr << ")";
+                    continue;
                 }
-                settings.ostr << "'[HIDDEN]'";
-                if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
-                    break; /// All other arguments should also be hidden.
-                continue;
             }
 
             if ((i == 1) && special_hilite_regexp
@@ -1166,7 +1249,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
 bool ASTFunction::hasSecretParts() const
 {
-    return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts();
+    return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts();
 }
 
 String getFunctionName(const IAST * ast)
diff --git a/tests/queries/0_stateless/02968_url_args.reference b/tests/queries/0_stateless/02968_url_args.reference
index aa19e45301c..1c3693e4a66 100644
--- a/tests/queries/0_stateless/02968_url_args.reference
+++ b/tests/queries/0_stateless/02968_url_args.reference
@@ -1 +1,8 @@
-CREATE TABLE default.a\n(\n    `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers(\'foo\' = \'bar\'))
+CREATE TABLE default.a\n(\n    `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers(\'foo\' = \'[HIDDEN]\', \'a\' = \'[HIDDEN]\'))
+CREATE TABLE default.b\n(\n    `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers())
+CREATE TABLE default.c\n(\n    `x` Int64\n)\nENGINE = S3(\'https://example.s3.amazonaws.com/a.csv\', \'NOSIGN\', \'CSV\', headers(\'foo\' = \'[HIDDEN]\'))
+CREATE TABLE default.d\n(\n    `x` Int64\n)\nENGINE = S3(\'https://example.s3.amazonaws.com/a.csv\', \'NOSIGN\', headers(\'foo\' = \'[HIDDEN]\'))
+CREATE VIEW default.e\n(\n    `x` Int64\n) AS\nSELECT count()\nFROM url(\'https://example.com/\', CSV, headers(\'foo\' = \'[HIDDEN]\', \'a\' = \'[HIDDEN]\'))
+CREATE VIEW default.f\n(\n    `x` Int64\n) AS\nSELECT count()\nFROM url(\'https://example.com/\', CSV, headers())
+CREATE VIEW default.g\n(\n    `x` Int64\n) AS\nSELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', CSV, headers(\'foo\' = \'[HIDDEN]\'))
+CREATE VIEW default.h\n(\n    `x` Int64\n) AS\nSELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', headers(\'foo\' = \'[HIDDEN]\'))
diff --git a/tests/queries/0_stateless/02968_url_args.sql b/tests/queries/0_stateless/02968_url_args.sql
index 8bee9fec0ac..e97ea381ea5 100644
--- a/tests/queries/0_stateless/02968_url_args.sql
+++ b/tests/queries/0_stateless/02968_url_args.sql
@@ -1,2 +1,17 @@
-create table a (x Int64) engine URL('https://example.com/', CSV, headers('foo' = 'bar'));
+create table a (x Int64) engine URL('https://example.com/', CSV, headers('foo' = 'bar', 'a' = '13'));
 show create a;
+create table b (x Int64) engine URL('https://example.com/', CSV, headers());
+show create b;
+create table c (x Int64) engine S3('https://example.s3.amazonaws.com/a.csv', NOSIGN, CSV, headers('foo' = 'bar'));
+show create c;
+create table d (x Int64) engine S3('https://example.s3.amazonaws.com/a.csv', NOSIGN, headers('foo' = 'bar'));
+show create d;
+
+create view e (x Int64) as select count() from url('https://example.com/', CSV, headers('foo' = 'bar', 'a' = '13'));
+show create e;
+create view f (x Int64) as select count() from url('https://example.com/', CSV, headers());
+show create f;
+create view g (x Int64) as select count() from s3('https://example.s3.amazonaws.com/a.csv', CSV, headers('foo' = 'bar'));
+show create g;
+create view h (x Int64) as select count() from s3('https://example.s3.amazonaws.com/a.csv', headers('foo' = 'bar'));
+show create h;

From 2a34bbb0e027f40c65da7db475496c20a455e0a0 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 21:36:45 -0800
Subject: [PATCH 507/884] [Docs] Cloud core settings corrections

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 docs/en/operations/settings/settings.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index b6db3ccc197..9e259ea43b4 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1130,7 +1130,7 @@ Possible values:
 
 Default value: `0`.
 
-Cloud default value: `2`.
+Cloud default value: from `2` to `4`, depending on the service size.
 
 Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
 Higher values will lead to higher memory usage.
@@ -2138,7 +2138,7 @@ Possible values:
 - Positive integer.
 - 0 — Retries are disabled
 
-Default value: 0
+Default value: 20
 
 Cloud default value: `20`.
 
@@ -3335,7 +3335,7 @@ Possible values:
 
 - a string representing any valid table engine name
 
-Default value: `None`.
+Default value: `MergeTree`.
 
 Cloud default value: `SharedMergeTree`.
 

From 89d4b1e77b545c18d4ad6017aa04ebe2d66da192 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 21:39:08 -0800
Subject: [PATCH 508/884] [Docs] Specify that some query profiler settings are
 disabled in Cloud

---
 docs/en/operations/settings/settings.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 9e259ea43b4..a275878f32e 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2670,7 +2670,7 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 Default value: 1000000000 nanoseconds (once a second).
 
-Cloud default value: `0`.
+**Temporarily disabled in ClickHouse Cloud.**
 
 See also:
 
@@ -2695,7 +2695,7 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 Default value: 1000000000 nanoseconds.
 
-Cloud default value: `0`.
+**Temporarily disabled in ClickHouse Cloud.**
 
 See also:
 

From aa05f07130cb2fe6070a32198138348df4bf1a03 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 8 Feb 2024 21:41:11 -0800
Subject: [PATCH 509/884] [Docs] Corrections for default cloud values for query
 complexity

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 docs/en/operations/settings/query-complexity.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 9c54e22b01d..d86f18ff982 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -28,7 +28,7 @@ The maximum amount of RAM to use for running a query on a single server.
 
 The default setting is unlimited (set to `0`).
 
-Cloud default value: 17 GB.
+Cloud default value: depends on the amount of RAM on the replica.
 
 The setting does not consider the volume of available memory or the total volume of memory on the machine.
 The restriction applies to a single query within a single server.
@@ -108,7 +108,7 @@ Possible values:
 
 Default value: `0`.
 
-Cloud default value: 8 GB.
+Cloud default value: half the memory amount per replica.
 
 ## max_bytes_before_external_sort {#settings-max_bytes_before_external_sort}
 
@@ -119,7 +119,7 @@ Enables or disables execution of `ORDER BY` clauses in external memory. See [ORD
 
 Default value: 0.
 
-Cloud default value: 8 GB.
+Cloud default value: half the memory amount per replica.
 
 ## max_rows_to_sort {#max-rows-to-sort}
 
@@ -139,7 +139,7 @@ Limit on the number of rows in the result. Also checked for subqueries, and on r
 
 Default value: `0`.
 
-Cloud default value: `500000`.
+Cloud default value: `0`.
 
 ## max_result_bytes {#max-result-bytes}
 
@@ -153,7 +153,7 @@ Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only a
 
 Default value: `throw`.
 
-Cloud default value: `break`.
+Cloud default value: `throw`.
 
 Example:
 

From e8fa8c3f7525c1507263b35222060dae81ae94f8 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Fri, 9 Feb 2024 06:36:45 +0000
Subject: [PATCH 510/884] no-fasttest

---
 tests/queries/0_stateless/02968_url_args.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02968_url_args.sql b/tests/queries/0_stateless/02968_url_args.sql
index e97ea381ea5..a9ac96970e0 100644
--- a/tests/queries/0_stateless/02968_url_args.sql
+++ b/tests/queries/0_stateless/02968_url_args.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 create table a (x Int64) engine URL('https://example.com/', CSV, headers('foo' = 'bar', 'a' = '13'));
 show create a;
 create table b (x Int64) engine URL('https://example.com/', CSV, headers());

From 359dda569337837be2ce08411188fa899ceb5c13 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 8 Feb 2024 22:38:53 -0800
Subject: [PATCH 511/884] Fix DB type check - now it'll refuse to create in
 Replicated databases (#59743)

Fix DB type check - now it'll refuse to create in Replicated databases
---
 src/Interpreters/InterpreterCreateQuery.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 7133c9eef34..c491ee30321 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1076,15 +1076,22 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
 {
     const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
     const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
+    bool is_replicated_database_internal = database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal;
+    bool from_path = create.attach_from_path.has_value();
+    bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
 
-    if (database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal
-        && !internal)
+    if (is_replicated_database_internal && !internal)
     {
         if (create.uuid == UUIDHelpers::Nil)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log");
     }
 
-    bool from_path = create.attach_from_path.has_value();
+    if (create.refresh_strategy && database->getEngineName() != "Atomic")
+        throw Exception(ErrorCodes::INCORRECT_QUERY,
+            "Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
+            /// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
+            ///       Figure out how to make the refreshed data appear all at once on other
+            ///       replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
 
     if (database->getUUID() != UUIDHelpers::Nil)
     {
@@ -1108,7 +1115,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
     }
     else
     {
-        bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
         bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil;
         if (has_uuid && !is_on_cluster && !internal)
         {
@@ -1121,13 +1127,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
                             "{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase());
         }
 
-        if (create.refresh_strategy && database->getEngineName() != "Atomic")
-            throw Exception(ErrorCodes::INCORRECT_QUERY,
-                "Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
-                /// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
-                ///       Figure out how to make the refreshed data appear all at once on other
-                ///       replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
-
         /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either
         /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or
         /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts.

From 42f2fefeab9941b11436dafcdcd62cfd31346980 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 9 Feb 2024 10:40:43 +0100
Subject: [PATCH 512/884] Fix data race and leak

---
 src/Coordination/KeeperSnapshotManager.cpp |  6 +--
 src/Coordination/KeeperStorage.cpp         | 51 ++++++++++------------
 src/Coordination/KeeperStorage.h           | 41 +++++------------
 src/Coordination/ZooKeeperDataReader.cpp   |  4 +-
 4 files changed, 39 insertions(+), 63 deletions(-)

diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index d7c9acae07a..091571b4a1a 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -82,7 +82,7 @@ namespace
         writeBinary(node.czxid, out);
         writeBinary(node.mzxid, out);
         writeBinary(node.ctime(), out);
-        writeBinary(node.mtime(), out);
+        writeBinary(node.mtime, out);
         writeBinary(node.version, out);
         writeBinary(node.cversion, out);
         writeBinary(node.aversion, out);
@@ -143,9 +143,7 @@ namespace
         int64_t ctime;
         readBinary(ctime, in);
         node.setCtime(ctime);
-        int64_t mtime;
-        readBinary(mtime, in);
-        node.setMtime(mtime);
+        readBinary(node.mtime, in);
         readBinary(node.version, in);
         readBinary(node.cversion, in);
         readBinary(node.aversion, in);
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 9d4c22164d3..446d3877c1c 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -175,13 +175,13 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
     if (node.data_size != 0)
     {
         chassert(node.data != nullptr);
-        hash.update(node.data, node.data_size);
+        hash.update(node.getData());
     }
 
     hash.update(node.czxid);
     hash.update(node.mzxid);
     hash.update(node.ctime());
-    hash.update(node.mtime());
+    hash.update(node.mtime);
     hash.update(node.version);
     hash.update(node.cversion);
     hash.update(node.aversion);
@@ -189,17 +189,17 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
     hash.update(node.numChildren());
     hash.update(node.pzxid);
 
+    auto digest = hash.get64();
+
+    /// 0 means no cached digest
+    if (digest == 0)
+        return 1;
+
     return hash.get64();
 }
 
 }
 
-KeeperStorage::Node::~Node()
-{
-    if (data_size)
-        delete[] data;
-}
-
 KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other)
 {
     if (this == &other)
@@ -209,8 +209,8 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other)
     mzxid = other.mzxid;
     pzxid = other.pzxid;
     acl_id = other.acl_id;
-    has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
-    is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
+    mtime = other.mtime;
+    is_ephemeral_and_ctime = other.is_ephemeral_and_ctime;
     ephemeral_or_children_data = other.ephemeral_or_children_data;
     data_size = other.data_size;
     version = other.version;
@@ -219,8 +219,8 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other)
 
     if (data_size != 0)
     {
-        data = new char[data_size];
-        memcpy(data, other.data, data_size);
+        data = std::unique_ptr<char[]>(new char[data_size]);
+        memcpy(data.get(), other.data.get(), data_size);
     }
 
     children = other.children;
@@ -244,8 +244,8 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
     mzxid = stat.mzxid;
     pzxid = stat.pzxid;
 
+    mtime = stat.mtime;
     setCtime(stat.ctime);
-    setMtime(stat.mtime);
 
     version = stat.version;
     cversion = stat.cversion;
@@ -253,7 +253,7 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat)
 
     if (stat.ephemeralOwner == 0)
     {
-        is_ephemeral_and_mtime.is_ephemeral = false;
+        is_ephemeral_and_ctime.is_ephemeral = false;
         setNumChildren(stat.numChildren);
     }
     else
@@ -267,7 +267,7 @@ void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) co
     response_stat.czxid = czxid;
     response_stat.mzxid = mzxid;
     response_stat.ctime = ctime();
-    response_stat.mtime = mtime();
+    response_stat.mtime = mtime;
     response_stat.version = version;
     response_stat.cversion = cversion;
     response_stat.aversion = aversion;
@@ -288,8 +288,8 @@ void KeeperStorage::Node::setData(const String & new_data)
     data_size = static_cast<uint32_t>(new_data.size());
     if (data_size != 0)
     {
-        data = new char[new_data.size()];
-        memcpy(data, new_data.data(), data_size);
+        data = std::unique_ptr<char[]>(new char[new_data.size()]);
+        memcpy(data.get(), new_data.data(), data_size);
     }
 }
 
@@ -305,16 +305,13 @@ void KeeperStorage::Node::removeChild(StringRef child_path)
 
 void KeeperStorage::Node::invalidateDigestCache() const
 {
-    has_cached_digest_and_ctime.has_cached_digest = false;
+    cached_digest = 0;
 }
 
 UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const
 {
-    if (!has_cached_digest_and_ctime.has_cached_digest)
-    {
+    if (cached_digest == 0)
         cached_digest = calculateDigest(path, *this);
-        has_cached_digest_and_ctime.has_cached_digest = true;
-    }
 
     return cached_digest;
 };
@@ -326,17 +323,17 @@ void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other)
     pzxid = other.pzxid;
     acl_id = other.acl_id; /// 0 -- no ACL by default
 
-    has_cached_digest_and_ctime = other.has_cached_digest_and_ctime;
+    mtime = other.mtime;
 
-    is_ephemeral_and_mtime = other.is_ephemeral_and_mtime;
+    is_ephemeral_and_ctime = other.is_ephemeral_and_ctime;
 
     ephemeral_or_children_data = other.ephemeral_or_children_data;
 
     data_size = other.data_size;
     if (data_size != 0)
     {
-        data = new char[data_size];
-        memcpy(data, other.data, data_size);
+        data = std::unique_ptr<char[]>(new char[data_size]);
+        memcpy(data.get(), other.data.get(), data_size);
     }
 
     version = other.version;
@@ -1448,7 +1445,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
                 {
                     value.version++;
                     value.mzxid = zxid;
-                    value.setMtime(time);
+                    value.mtime = time;
                     value.setData(data);
                 },
                 request.version});
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index cf9c1710d3d..222433dc5d4 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -38,17 +38,13 @@ public:
         int64_t pzxid{0};
         uint64_t acl_id = 0; /// 0 -- no ACL by default
 
-        mutable struct
-        {
-            bool has_cached_digest : 1;
-            int64_t ctime : 63;
-        } has_cached_digest_and_ctime{false, 0};
+        int64_t mtime;
 
         struct
         {
             bool is_ephemeral : 1;
-            int64_t mtime : 63;
-        } is_ephemeral_and_mtime{false, 0};
+            int64_t ctime : 63;
+        } is_ephemeral_and_ctime{false, 0};
 
         union
         {
@@ -60,19 +56,15 @@ public:
             } children_info;
         } ephemeral_or_children_data{0};
 
-        char * data{nullptr};
+        std::unique_ptr<char[]> data{nullptr};
         uint32_t data_size{0};
 
         int32_t version{0};
         int32_t cversion{0};
         int32_t aversion{0};
 
-        /// we cannot use `std::optional<uint64_t> because we want to
-        /// pack the boolean with seq_num above
         mutable uint64_t cached_digest = 0;
 
-        ~Node();
-
         Node() = default;
 
         Node & operator=(const Node & other);
@@ -83,7 +75,7 @@ public:
 
         bool isEphemeral() const
         {
-            return is_ephemeral_and_mtime.is_ephemeral;
+            return is_ephemeral_and_ctime.is_ephemeral;
         }
 
         int64_t ephemeralOwner() const
@@ -96,7 +88,7 @@ public:
 
         void setEphemeralOwner(int64_t ephemeral_owner)
         {
-            is_ephemeral_and_mtime.is_ephemeral = ephemeral_owner != 0;
+            is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0;
             ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
         }
 
@@ -146,22 +138,12 @@ public:
 
         int64_t ctime() const
         {
-            return has_cached_digest_and_ctime.ctime;
+            return is_ephemeral_and_ctime.ctime;
         }
 
         void setCtime(uint64_t ctime)
         {
-            has_cached_digest_and_ctime.ctime = ctime;
-        }
-
-        int64_t mtime() const
-        {
-            return is_ephemeral_and_mtime.mtime;
-        }
-
-        void setMtime(uint64_t mtime)
-        {
-            is_ephemeral_and_mtime.mtime = mtime;
+            is_ephemeral_and_ctime.ctime = ctime;
         }
 
         void copyStats(const Coordination::Stat & stat);
@@ -173,7 +155,7 @@ public:
 
         void setData(const String & new_data);
 
-        StringRef getData() const noexcept { return {data, data_size}; }
+        StringRef getData() const noexcept { return {data.get(), data_size}; }
 
         void addChild(StringRef child_path);
 
@@ -205,10 +187,11 @@ public:
         NO_DIGEST = 0,
         V1 = 1,
         V2 = 2, // added system nodes that modify the digest on startup so digest from V0 is invalid
-        V3 = 3  // fixed bug with casting, removed duplicate czxid usage
+        V3 = 3, // fixed bug with casting, removed duplicate czxid usage
+        V4 = 4  // 0 is not a valid digest value
     };
 
-    static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V3;
+    static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V4;
 
     struct ResponseForSession
     {
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index e027b44b3a2..c7b1abf1d83 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -114,9 +114,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP
         int64_t ctime;
         Coordination::read(ctime, in);
         node.setCtime(ctime);
-        int64_t mtime;
-        Coordination::read(mtime, in);
-        node.setMtime(mtime);
+        Coordination::read(node.mtime, in);
         Coordination::read(node.version, in);
         Coordination::read(node.cversion, in);
         Coordination::read(node.aversion, in);

From 1ff0a5430e63600f65b006513ac16d15b577d5b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 9 Feb 2024 11:08:54 +0100
Subject: [PATCH 513/884] Fix unpoison call

---
 src/Common/memcmpSmall.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/memcmpSmall.h b/src/Common/memcmpSmall.h
index 0b05b4a53bd..103eabb5b8d 100644
--- a/src/Common/memcmpSmall.h
+++ b/src/Common/memcmpSmall.h
@@ -661,8 +661,8 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
     if (a_size != b_size)
         return false;
 
-    __msan_unpoison(&a[a_size - a_size % 16], a_size % 16);
-    __msan_unpoison(&b[b_size - b_size % 16], b_size % 16);
+    __msan_unpoison_overflow_15(a, a_size);
+    __msan_unpoison_overflow_15(b, b_size);
 
     for (size_t offset = 0; offset < a_size; offset += 16)
     {

From 8655c11280c154f267b8f36fd24dc21c7b786aec Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Fri, 9 Feb 2024 11:45:15 +0100
Subject: [PATCH 514/884] Fix typo

---
 src/Storages/StorageMerge.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 09c38996b22..79d7b83cada 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -786,7 +786,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
     projection.clear();
     NamesAndTypes projection_columns;
 
-    // Select only required columns from the table, because prjection list may contain:
+    // Select only required columns from the table, because projection list may contain:
     // 1. aggregate functions
     // 2. expressions referencing other tables of JOIN
     for (auto const & column_name : required_column_names)

From d2eac53c86ccfbd809f45511dfc087c11bc92f9a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 9 Feb 2024 12:06:01 +0100
Subject: [PATCH 515/884] Fix

---
 src/Coordination/KeeperStorage.cpp | 45 ++++++++++++++++++++++++++----
 src/Coordination/KeeperStorage.h   | 38 ++-----------------------
 utils/keeper-data-dumper/main.cpp  |  2 +-
 3 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 446d3877c1c..c62ed90e378 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -172,10 +172,11 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
 
     hash.update(path);
 
-    if (node.data_size != 0)
+    auto data = node.getData();
+    if (!data.empty())
     {
-        chassert(node.data != nullptr);
-        hash.update(node.getData());
+        chassert(data.data() != nullptr);
+        hash.update(data);
     }
 
     hash.update(node.czxid);
@@ -195,7 +196,7 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node
     if (digest == 0)
         return 1;
 
-    return hash.get64();
+    return digest;
 }
 
 }
@@ -477,6 +478,40 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
         delta.operation);
 }
 
+bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate) const
+{
+    const auto check_auth = [&](const auto & auth_ids)
+    {
+        for (const auto & auth : auth_ids)
+        {
+            using TAuth = std::remove_reference_t<decltype(auth)>;
+
+            const AuthID * auth_ptr = nullptr;
+            if constexpr (std::is_pointer_v<TAuth>)
+                auth_ptr = auth;
+            else
+                auth_ptr = &auth;
+
+            if (predicate(*auth_ptr))
+                return true;
+        }
+        return false;
+    };
+
+    if (is_local)
+        return check_auth(storage.session_and_auth[session_id]);
+
+    if (check_auth(storage.session_and_auth[session_id]))
+        return true;
+
+    // check if there are uncommitted
+    const auto auth_it = session_and_auth.find(session_id);
+    if (auth_it == session_and_auth.end())
+        return false;
+
+    return check_auth(auth_it->second);
+}
+
 void KeeperStorage::UncommittedState::addDelta(Delta new_delta)
 {
     const auto & added_delta = deltas.emplace_back(std::move(new_delta));
@@ -1228,7 +1263,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
         {
             node_it->value.setResponseStat(response.stat);
             auto data = node_it->value.getData();
-            response.data = std::string(data.data, data.size);
+            response.data = std::string(data);
             response.error = Coordination::Error::ZOK;
         }
 
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index 222433dc5d4..9743e051422 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -38,7 +38,7 @@ public:
         int64_t pzxid{0};
         uint64_t acl_id = 0; /// 0 -- no ACL by default
 
-        int64_t mtime;
+        int64_t mtime{0};
 
         struct
         {
@@ -155,7 +155,7 @@ public:
 
         void setData(const String & new_data);
 
-        StringRef getData() const noexcept { return {data.get(), data_size}; }
+        std::string_view getData() const noexcept { return {data.get(), data_size}; }
 
         void addChild(StringRef child_path);
 
@@ -335,39 +335,7 @@ public:
 
         void applyDelta(const Delta & delta);
 
-        bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate)
-        {
-            const auto check_auth = [&](const auto & auth_ids)
-            {
-                for (const auto & auth : auth_ids)
-                {
-                    using TAuth = std::remove_reference_t<decltype(auth)>;
-
-                    const AuthID * auth_ptr = nullptr;
-                    if constexpr (std::is_pointer_v<TAuth>)
-                        auth_ptr = auth;
-                    else
-                        auth_ptr = &auth;
-
-                    if (predicate(*auth_ptr))
-                        return true;
-                }
-                return false;
-            };
-
-            if (is_local)
-                return check_auth(storage.session_and_auth[session_id]);
-
-            if (check_auth(storage.session_and_auth[session_id]))
-                return true;
-
-            // check if there are uncommitted
-            const auto auth_it = session_and_auth.find(session_id);
-            if (auth_it == session_and_auth.end())
-                return false;
-
-            return check_auth(auth_it->second);
-        }
+        bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate) const;
 
         void forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const;
 
diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp
index 12acd0a8db9..bae44bb1628 100644
--- a/utils/keeper-data-dumper/main.cpp
+++ b/utils/keeper-data-dumper/main.cpp
@@ -26,7 +26,7 @@ void dumpMachine(std::shared_ptr<KeeperStateMachine> machine)
         std::cout << key << "\n";
         auto value = storage.container.getValue(key);
         std::cout << "\tStat: {version: " << value.version <<
-            ", mtime: " << value.mtime() <<
+            ", mtime: " << value.mtime <<
             ", emphemeralOwner: " << value.ephemeralOwner() <<
             ", czxid: " << value.czxid <<
             ", mzxid: " << value.mzxid <<

From e522e23ce86b92c0b9765687cda8153a1f8ccd42 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 9 Feb 2024 12:41:25 +0100
Subject: [PATCH 516/884] Update reference file

---
 .../02731_parallel_replicas_join_subquery.reference           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
index ec4928bc325..48ea1154bc3 100644
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
@@ -31,7 +31,7 @@
 29	2j&S)ba?XG	QuQj	17163829389637435056
 3	UlI+1		14144472852965836438
 =============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ===============
-0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer = 1
+0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer = 1
 0	3	SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10
 1	1	-- Parallel inner query alone\nSELECT\n    key,\n    value1,\n    value2,\n    toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;
 1	1	-- Parallel inner query alone\nSELECT\n    key,\n    value1,\n    value2,\n    toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1;
@@ -58,7 +58,7 @@ U	c	10
 UlI+1		10
 bX?}ix [	Ny]2 G	10
 t<iT	X48q:Z]t0	10
-0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2`
+0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2`
 0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
 0	3	SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2`
 0	3	SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2`

From fc34507a94019465ce6aa23ae084fd21714c9aec Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Fri, 9 Feb 2024 12:48:07 +0100
Subject: [PATCH 517/884] Bump version

---
 tests/integration/test_replicating_constants/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py
index 90132b71a64..af8916dd625 100644
--- a/tests/integration/test_replicating_constants/test.py
+++ b/tests/integration/test_replicating_constants/test.py
@@ -9,7 +9,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="clickhouse/clickhouse-server",
-    tag="22.3",
+    tag="23.3",
     with_installed_binary=True,
 )
 

From 3ee2dda51142c5321837eeaf7a00404ac46c74f4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 9 Feb 2024 12:59:06 +0100
Subject: [PATCH 518/884] Fix special build reports in release branches

---
 .github/workflows/release_branches.yml | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index c5d8294b999..87fbf363f0b 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -206,13 +206,8 @@ jobs:
     if: ${{ !cancelled() }}
     needs:
       - RunConfig
-      - BuilderDebRelease
-      - BuilderDebAarch64
-      - BuilderDebAsan
-      - BuilderDebTsan
-      - BuilderDebUBsan
-      - BuilderDebMsan
-      - BuilderDebDebug
+      - BuilderBinDarwin
+      - BuilderBinDarwinAarch64
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickHouse special build check

From 54b7fdb8329160cee44b47af980dd0499eeed253 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 9 Feb 2024 12:24:33 +0000
Subject: [PATCH 519/884] Update version_date.tsv and changelogs after
 v24.1.3.31-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.3.31-stable.md | 34 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 ++
 5 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.1.3.31-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index fe33bf9e0ea..e8293869a56 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index f0adadd2d59..3cf89f1b841 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index dd00684cc24..d477d8aaca1 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.3.31-stable.md b/docs/changelogs/v24.1.3.31-stable.md
new file mode 100644
index 00000000000..046ca451fbc
--- /dev/null
+++ b/docs/changelogs/v24.1.3.31-stable.md
@@ -0,0 +1,34 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.3.31-stable (135b08cbd28) FIXME as compared to v24.1.2.5-stable (b2605dd4a5a)
+
+#### Improvement
+* Backported in [#59569](https://github.com/ClickHouse/ClickHouse/issues/59569): Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#59776](https://github.com/ClickHouse/ClickHouse/issues/59776): Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. This settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)).
+* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)).
+* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Backport [#59650](https://github.com/ClickHouse/ClickHouse/issues/59650) to 24.1: MergeTree FINAL optimization diagnostics and settings"'. [#59701](https://github.com/ClickHouse/ClickHouse/pull/59701) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)).
+* Refactoring of dashboard state encoding [#59554](https://github.com/ClickHouse/ClickHouse/pull/59554) ([Sergei Trifonov](https://github.com/serxa)).
+* MergeTree FINAL optimization diagnostics and settings [#59650](https://github.com/ClickHouse/ClickHouse/pull/59650) ([Maksim Kita](https://github.com/kitaisreal)).
+* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 36b1db583a8..1e931b95b41 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,7 @@
+v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30
+v23.12.4.15-stable	2024-02-09
 v23.12.3.40-stable	2024-02-02
 v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28

From 582abd40fdba6cca9a645271034fc75a292ae3a2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 9 Feb 2024 12:25:38 +0000
Subject: [PATCH 520/884] Update version_date.tsv and changelogs after
 v23.12.4.15-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.12.4.15-stable.md | 21 +++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  2 ++
 5 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.12.4.15-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index fe33bf9e0ea..e8293869a56 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index f0adadd2d59..3cf89f1b841 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index dd00684cc24..d477d8aaca1 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.2.5"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.12.4.15-stable.md b/docs/changelogs/v23.12.4.15-stable.md
new file mode 100644
index 00000000000..a67b5aee312
--- /dev/null
+++ b/docs/changelogs/v23.12.4.15-stable.md
@@ -0,0 +1,21 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.12.4.15-stable (4233d111d20) FIXME as compared to v23.12.3.40-stable (a594704ae75)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)).
+* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 36b1db583a8..1e931b95b41 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,7 @@
+v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30
+v23.12.4.15-stable	2024-02-09
 v23.12.3.40-stable	2024-02-02
 v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28

From 361dd64374c843293ae6cccbcebcb04388c21ff9 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 9 Feb 2024 13:41:49 +0100
Subject: [PATCH 521/884] Fix expected error message

---
 tests/integration/test_access_for_functions/test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py
index 5069468110c..3e58c961421 100644
--- a/tests/integration/test_access_for_functions/test.py
+++ b/tests/integration/test_access_for_functions/test.py
@@ -38,8 +38,11 @@ def test_access_rights_for_function():
 
     instance.query("GRANT DROP FUNCTION ON *.* TO B")
     instance.query("DROP FUNCTION MySum", user="B")
-    assert "Unknown function MySum" in instance.query_and_get_error(
-        "SELECT MySum(1, 2)"
+
+    function_resolution_error = instance.query_and_get_error("SELECT MySum(1, 2)")
+    assert (
+        "Unknown function MySum" in function_resolution_error
+        or "Function with name 'MySum' does not exists." in function_resolution_error
     )
 
     instance.query("REVOKE CREATE FUNCTION ON *.* FROM A")

From 19fcffb69949b1aa2e7da5231f8c65f79c3bdf36 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 9 Feb 2024 13:42:46 +0100
Subject: [PATCH 522/884] Update tests/analyzer_integration_broken_tests.txt

---
 tests/analyzer_integration_broken_tests.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c04ed440c18..725b746f1a0 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -1,4 +1,3 @@
-test_access_for_functions/test.py::test_access_rights_for_function
 test_build_sets_from_multiple_threads/test.py::test_set
 test_concurrent_backups_s3/test.py::test_concurrent_backups
 test_distributed_backward_compatability/test.py::test_distributed_in_tuple

From 9dfe3f55590d3f094b6bb7dffda6e4d8f7b44739 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 9 Feb 2024 12:21:11 +0000
Subject: [PATCH 523/884] CI: do not reuse builds on release branches

 #job_package_debug #job_style_check
---
 .github/workflows/master.yml           | 22 +++++++++++++---------
 .github/workflows/release_branches.yml | 11 ++++++++---
 .github/workflows/reusable_build.yml   |  9 +++++++--
 tests/ci/ci.py                         |  8 +++++++-
 tests/ci/ci_config.py                  |  5 -----
 5 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index dac1332adc6..33f98e492b5 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -318,15 +318,19 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
-    needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Mark Commit Release Ready
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-        python3 mark_release_ready.py
+    needs:
+      - BuilderBinDarwin
+      - BuilderBinDarwinAarch64
+      - BuilderDebRelease
+      - BuilderDebAarch64
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+      - name: Mark Commit Release Ready
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 mark_release_ready.py
 ############################################################################################
 #################################### INSTALL PACKAGES ######################################
 ############################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index c5d8294b999..c1e4a1800c7 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -91,6 +91,8 @@ jobs:
       build_name: package_release
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+      # always rebuild on release branches to be able to publish from any commit
+      force: true
   BuilderDebAarch64:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
@@ -99,6 +101,8 @@ jobs:
       build_name: package_aarch64
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+      # always rebuild on release branches to be able to publish from any commit
+      force: true
   BuilderDebAsan:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
@@ -142,6 +146,8 @@ jobs:
       build_name: binary_darwin
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+      # always rebuild on release branches to be able to publish from any commit
+      force: true
   BuilderBinDarwinAarch64:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}
@@ -150,6 +156,8 @@ jobs:
       build_name: binary_darwin_aarch64
       checkout_depth: 0
       data: ${{ needs.RunConfig.outputs.data }}
+      # always rebuild on release branches to be able to publish from any commit
+      force: true
 ############################################################################################
 ##################################### Docker images  #######################################
 ############################################################################################
@@ -225,7 +233,6 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
-    if: ${{ !failure() && !cancelled() }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64
@@ -235,8 +242,6 @@ jobs:
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
       - name: Mark Commit Release Ready
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 6be9d30175e..80d78d93e1b 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -26,6 +26,10 @@ name: Build ClickHouse
         description: json ci data
         type: string
         required: true
+      force:
+        description: disallow job skipping
+        type: boolean
+        default: false
       additional_envs:
         description: additional ENV variables to setup the job
         type: string
@@ -33,7 +37,7 @@ name: Build ClickHouse
 jobs:
   Build:
     name: Build-${{inputs.build_name}}
-    if: contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name)
+    if: ${{ contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name) || inputs.force }}
     env:
       GITHUB_JOB_OVERRIDDEN: Build-${{inputs.build_name}}
     runs-on: [self-hosted, '${{inputs.runner_type}}']
@@ -78,7 +82,8 @@ jobs:
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
             --infile ${{ toJson(inputs.data) }} \
             --job-name "$BUILD_NAME" \
-            --run
+            --run \
+            ${{ inputs.force && '--force' || '' }}
       - name: Post
         # it still be build report to upload for failed build job
         if: ${{ !cancelled() }}
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index ce8d1c8c20e..213225d0ea8 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -816,6 +816,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         default=False,
         help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
     )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        default=False,
+        help="Used with --run, force the job to run, omitting the ci cache",
+    )
     # FIXME: remove, not used
     parser.add_argument(
         "--rebuild-all-binaries",
@@ -1762,7 +1768,7 @@ def main() -> int:
                     previous_status = job_status.status
                     GHActions.print_in_group("Commit Status Data", job_status)
 
-        if previous_status:
+        if previous_status and not args.force:
             print(
                 f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]"
             )
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 7c8990e8d16..998d0876527 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -140,8 +140,6 @@ class JobNames(metaclass=WithIter):
     DOCS_CHECK = "Docs check"
     BUGFIX_VALIDATE = "tests bugfix validate check"
 
-    MARK_RELEASE_READY = "Mark Commit Release Ready"
-
 
 # dynamically update JobName with Build jobs
 for attr_name in dir(Build):
@@ -828,9 +826,6 @@ CI_CONFIG = CiConfig(
         ),
     },
     other_jobs_configs={
-        JobNames.MARK_RELEASE_READY: TestConfig(
-            "", job_config=JobConfig(release_only=True)
-        ),
         JobNames.DOCKER_SERVER: TestConfig(
             "",
             job_config=JobConfig(

From 6aad9b1f134c76f59160b45c58e4bd720e3dde1d Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 9 Feb 2024 14:05:01 +0100
Subject: [PATCH 524/884] Analyzer: Fix
 test_wrong_db_or_table_name/test.py::test_wrong_table_name

---
 tests/analyzer_integration_broken_tests.txt   |  1 -
 .../test_wrong_db_or_table_name/test.py       | 33 +++++++++++--------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c04ed440c18..1eac8b0e49c 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -17,6 +17,5 @@ test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
 test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
 test_user_defined_object_persistence/test.py::test_persistence
-test_wrong_db_or_table_name/test.py::test_wrong_table_name
 test_zookeeper_config/test.py::test_chroot_with_same_root
 test_zookeeper_config/test.py::test_chroot_with_different_root
diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py
index a5096d80ca9..4a6dcf5aa41 100644
--- a/tests/integration/test_wrong_db_or_table_name/test.py
+++ b/tests/integration/test_wrong_db_or_table_name/test.py
@@ -92,26 +92,31 @@ def test_wrong_table_name(start):
         INSERT INTO test.table_test SELECT 1;
         """
     )
-    with pytest.raises(
-        QueryRuntimeException,
-        match="DB::Exception: Table test.table_test1 does not exist. Maybe you meant test.table_test?.",
-    ):
-        node.query(
-            """
+
+    error_message = node.query_and_get_error(
+        """
             SELECT * FROM test.table_test1 LIMIT 1;
             """
-        )
+    )
+    assert (
+        "DB::Exception: Table test.table_test1 does not exist. Maybe you meant test.table_test?"
+        in error_message
+        or "DB::Exception: Unknown table expression identifier 'test.table_test1' in scope SELECT * FROM test.table_test1 LIMIT 1."
+        in error_message
+    )
     assert int(node.query("SELECT count() FROM test.table_test;")) == 1
 
-    with pytest.raises(
-        QueryRuntimeException,
-        match="DB::Exception: Table test2.table_test1 does not exist. Maybe you meant test.table_test?.",
-    ):
-        node.query(
-            """
+    error_message = node.query_and_get_error(
+        """
             SELECT * FROM test2.table_test1 LIMIT 1;
             """
-        )
+    )
+    assert (
+        "DB::Exception: Table test2.table_test1 does not exist. Maybe you meant test.table_test?."
+        in error_message
+        or "DB::Exception: Unknown table expression identifier 'test2.table_test1' in scope SELECT * FROM test2.table_test1 LIMIT 1."
+        in error_message
+    )
 
     node.query(
         """

From c2320c2d160b3a04a03a3bff9fa738af09066c92 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 9 Feb 2024 13:17:49 +0000
Subject: [PATCH 525/884] CI: await tune ups

 #no_ci_cache #job_package_debug
---
 tests/ci/ci.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 213225d0ea8..47e20b3ec09 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -646,14 +646,14 @@ class CiCache:
         TIMEOUT = 3600
         await_finished: Dict[str, List[int]] = {}
         round_cnt = 0
-        while len(jobs_with_params) > 5 and round_cnt < 3:
+        while len(jobs_with_params) > 4 and round_cnt < 5:
             round_cnt += 1
             GHActions.print_in_group(
                 f"Wait pending jobs, round [{round_cnt}]:", list(jobs_with_params)
             )
             # this is initial approach to wait pending jobs:
-            # start waiting for the next TIMEOUT seconds if there are more than X(=5) jobs to wait
-            # wait TIMEOUT seconds in rounds. Y(=3) is the max number of rounds
+            # start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait
+            # wait TIMEOUT seconds in rounds. Y(=5) is the max number of rounds
             expired_sec = 0
             start_at = int(time.time())
             while expired_sec < TIMEOUT and jobs_with_params:
@@ -701,13 +701,11 @@ class CiCache:
                 print(
                     f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]"
                 )
-        if await_finished:
-            GHActions.print_in_group(
-                "Finished jobs:",
-                [f"{job}:{batches}" for job, batches in await_finished.items()],
-            )
-        else:
-            print("Awaiting FAILED. No job has finished successfully.")
+            if await_finished:
+                GHActions.print_in_group(
+                    f"Finished jobs, round [{round_cnt}]:",
+                    [f"{job}:{batches}" for job, batches in await_finished.items()],
+                )
         GHActions.print_in_group(
             "Remaining jobs:",
             [f"{job}:{params['batches']}" for job, params in jobs_with_params.items()],

From 68df20114508c838cff0dbc7bc0d7f81025a6bdf Mon Sep 17 00:00:00 2001
From: Alexander Nikolaev <zvava@ya.ru>
Date: Fri, 9 Feb 2024 16:50:50 +0300
Subject: [PATCH 526/884] Add CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS envoronment
 variable.

---
 docker/server/entrypoint.sh | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 78f18f376f4..69c0fdda351 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -118,13 +118,19 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
 EOT
 fi
 
+CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS="${CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS:-}"
+
 # checking $DATA_DIR for initialization
 if [ -d "${DATA_DIR%/}/data" ]; then
     DATABASE_ALREADY_EXISTS='true'
 fi
 
-# only run initialization on an empty data directory
-if [ -z "${DATABASE_ALREADY_EXISTS}" ]; then
+# run initialization if flag CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS is not empty or data directory is empty
+if [[ -n "${CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS}" || -z "${DATABASE_ALREADY_EXISTS}" ]]; then
+  RUN_INITDB_SCRIPTS='true'
+fi
+
+if [ -z "${RUN_INITDB_SCRIPTS}" ]; then
     if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
         # port is needed to check if clickhouse-server is ready for connections
         HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"

From 50828dade25fa0f431907f9a466c305ca3d6e1b2 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 9 Feb 2024 14:53:03 +0100
Subject: [PATCH 527/884] Fix stacktraces on MacOS (#59690)

---
 CMakeLists.txt                            | 15 +++++---
 contrib/libunwind-cmake/unwind-override.c |  4 +++
 contrib/llvm-project                      |  2 +-
 src/Common/StackTrace.cpp                 | 43 ++++++++++++++++++++++-
 4 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 063cfc77302..8b232aa12ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -254,10 +254,17 @@ endif()
 
 include(cmake/cpu_features.cmake)
 
-# Asynchronous unwind tables are needed for Query Profiler.
-# They are already by default on some platforms but possibly not on all platforms.
-# Enable it explicitly.
-set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
+
+# Query Profiler doesn't work on MacOS for several reasons
+# - PHDR cache is not available
+# - We use native functionality to get stacktraces which is not async signal safe
+# and thus we don't need to generate asynchronous unwind tables
+if (NOT OS_DARWIN)
+    # Asynchronous unwind tables are needed for Query Profiler.
+    # They are already by default on some platforms but possibly not on all platforms.
+    # Enable it explicitly.
+    set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
+endif()
 
 # Reproducible builds.
 if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
diff --git a/contrib/libunwind-cmake/unwind-override.c b/contrib/libunwind-cmake/unwind-override.c
index 616bab6ae4b..57928d817eb 100644
--- a/contrib/libunwind-cmake/unwind-override.c
+++ b/contrib/libunwind-cmake/unwind-override.c
@@ -1,6 +1,10 @@
 #include <libunwind.h>
 
+/// On MacOS this function will be replaced with a dynamic symbol
+/// from the system library.
+#if !defined(OS_DARWIN)
 int backtrace(void ** buffer, int size)
 {
     return unw_backtrace(buffer, size);
 }
+#endif
diff --git a/contrib/llvm-project b/contrib/llvm-project
index 2568a7cd129..d2142eed980 160000
--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@@ -1 +1 @@
-Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
+Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 8431630b16c..7e683ae91de 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -4,6 +4,7 @@
 #include <base/constexpr_helpers.h>
 #include <base/demangle.h>
 
+#include <Common/scope_guard_safe.h>
 #include <Common/Dwarf.h>
 #include <Common/Elf.h>
 #include <Common/MemorySanitizer.h>
@@ -24,6 +25,15 @@
 
 #include "config.h"
 
+#include <boost/algorithm/string/split.hpp>
+
+#if defined(OS_DARWIN)
+/// This header contains functions like `backtrace` and `backtrace_symbols`
+/// Which will be used for stack unwinding on Mac.
+/// Read: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/backtrace.3.html
+#include "execinfo.h"
+#endif
+
 namespace
 {
 /// Currently this variable is set up once on server startup.
@@ -262,6 +272,33 @@ void StackTrace::forEachFrame(
             callback(current_inline_frame);
         }
 
+        callback(current_frame);
+    }
+#elif defined(OS_DARWIN)
+    UNUSED(fatal);
+
+    /// This function returns an array of string in a special (a little bit weird format)
+    /// The frame number, library name, address in hex, mangled symbol name, `+` sign, the offset.
+    char** strs = ::backtrace_symbols(frame_pointers.data(), static_cast<int>(size));
+    SCOPE_EXIT_SAFE({free(strs);});
+
+    for (size_t i = offset; i < size; ++i)
+    {
+        StackTrace::Frame current_frame;
+
+        std::vector<std::string> split;
+        boost::split(split, strs[i], isWhitespaceASCII);
+        split.erase(
+            std::remove_if(
+                split.begin(), split.end(),
+                [](const std::string & x) { return x.empty(); }),
+            split.end());
+        assert(split.size() == 6);
+
+        current_frame.virtual_addr = frame_pointers[i];
+        current_frame.physical_addr = frame_pointers[i];
+        current_frame.object = split[1];
+        current_frame.symbol = split[3];
         callback(current_frame);
     }
 #else
@@ -306,7 +343,11 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
 
 void StackTrace::tryCapture()
 {
+#if defined(OS_DARWIN)
+    size = backtrace(frame_pointers.data(), capacity);
+#else
     size = unw_backtrace(frame_pointers.data(), capacity);
+#endif
     __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
 }
 
@@ -376,7 +417,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
         return callback("<Empty trace>");
 
     size_t frame_index = stack_trace.offset;
-#if defined(__ELF__) && !defined(OS_FREEBSD)
+#if (defined(__ELF__) && !defined(OS_FREEBSD)) || defined(OS_DARWIN)
     size_t inline_frame_index = 0;
     auto callback_wrapper = [&](const StackTrace::Frame & frame)
     {

From 3cca665f667bbda8b8c1a25b35e2684813ea61a3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 14:06:41 +0000
Subject: [PATCH 528/884] Attempt to fix more tests.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  | 16 +++++++++
 .../QueryPlan/Optimizations/splitFilter.cpp   | 35 +++++++++++++++++--
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index c8c95e7443f..93430e072bb 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -118,6 +118,22 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
         optimizePrewhere(stack, nodes);
         optimizePrimaryKeyCondition(stack);
 
+        auto & frame = stack.back();
+
+        /// Traverse all children first.
+        if (frame.next_child < frame.node->children.size())
+        {
+            auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
+            ++frame.next_child;
+            stack.push_back(next_frame);
+            continue;
+        }
+
+        stack.pop_back();
+    }
+
+    while (!stack.empty())
+    {
         {
             /// NOTE: frame cannot be safely used after stack was modified.
             auto & frame = stack.back();
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
index 8c212936195..561ad7302c6 100644
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -14,19 +14,33 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
         return 0;
 
     const auto & expr = filter_step->getExpression();
+    const std::string & filter_column_name = filter_step->getFilterColumnName();
 
     /// Do not split if there are function like runningDifference.
     if (expr->hasStatefulFunctions())
         return 0;
 
-    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
+    bool filter_name_clashs_with_input = false;
+    if (filter_step->removesFilterColumn())
+    {
+        for (const auto * input : expr->getInputs())
+        {
+            if (input->result_name == filter_column_name)
+            {
+                filter_name_clashs_with_input = true;
+                break;
+            }
+        }
+    }
+
+    auto split = expr->splitActionsForFilter(filter_column_name);
 
     if (split.second->trivial())
         return 0;
 
     bool remove_filter = false;
     if (filter_step->removesFilterColumn())
-        remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
+        remove_filter = split.second->removeUnusedResult(filter_column_name);
 
     auto description = filter_step->getStepDescription();
 
@@ -34,10 +48,25 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
     node->children.swap(filter_node.children);
     node->children.push_back(&filter_node);
 
+    std::string split_filter_name = filter_column_name;
+    if (filter_name_clashs_with_input)
+    {
+        split_filter_name = "__split_filter";
+
+        for (auto & filter_output : split.first->getOutputs())
+        {
+            if (filter_output->result_name == filter_column_name)
+            {
+                filter_output = &split.first->addAlias(*filter_output, split_filter_name);
+                break;
+            }
+        }
+    }
+
     filter_node.step = std::make_unique<FilterStep>(
             filter_node.children.at(0)->step->getOutputStream(),
             std::move(split.first),
-            filter_step->getFilterColumnName(),
+            std::move(split_filter_name),
             remove_filter);
 
     node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));

From 7575f0db84dc01fb1a6e5b5404d0f3d298e220f1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 14:36:56 +0000
Subject: [PATCH 529/884] Fix some tests.

---
 ...771_ignore_data_skipping_indices.reference | 111 +++++++++-----
 .../02771_ignore_data_skipping_indices.sql    |   8 ++
 ...ndex_in_function_different_types.reference | 136 +++++++-----------
 ..._key_index_in_function_different_types.sql |  17 +--
 ...f_indexes_support_match_function.reference |  12 --
 ...ngrambf_indexes_support_match_function.sql |  89 +-----------
 6 files changed, 140 insertions(+), 233 deletions(-)

diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
index fcede2caf2a..e23e3094ca3 100644
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
@@ -1,40 +1,77 @@
 1	2	3
 1	2	3
 1	2	3
-  ReadFromMergeTree (default.data_02771)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
-      Granules: 1/1
-    Skip
-      Name: x_idx
-      Description: minmax GRANULARITY 1
-      Parts: 0/1
-      Granules: 0/1
-    Skip
-      Name: y_idx
-      Description: minmax GRANULARITY 1
-      Parts: 0/0
-      Granules: 0/0
-    Skip
-      Name: xy_idx
-      Description: minmax GRANULARITY 1
-      Parts: 0/0
-      Granules: 0/0
-  ReadFromMergeTree (default.data_02771)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
-      Granules: 1/1
-    Skip
-      Name: x_idx
-      Description: minmax GRANULARITY 1
-      Parts: 0/1
-      Granules: 0/1
-    Skip
-      Name: y_idx
-      Description: minmax GRANULARITY 1
-      Parts: 0/0
-      Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
index a49239e9de2..716421b7342 100644
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
@@ -24,6 +24,14 @@ SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
 
 SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+
+SET allow_experimental_analyzer = 0;
+
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+
+SET allow_experimental_analyzer = 1;
+
 SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
 SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
 
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
index f34aad737d4..6338d048186 100644
--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
@@ -1,88 +1,48 @@
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Projection + Before ORDER BY))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 1-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
-        Parts: 1/1
-        Granules: 1/1
-CreatingSets (Create sets before main query execution)
-  Expression ((Project names + Projection))
-    ReadFromMergeTree (default.test_table)
-    Indexes:
-      PrimaryKey
-        Keys: 
-          id
-          value
-        Condition: and((id in (-Inf, 10]), (value in 5-element set))
-        Parts: 1/1
-        Granules: 1/1
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
+      Indexes:
+        PrimaryKey
+          Keys: 
+            id
+            value
+          Condition: and((value in 1-element set), (id in (-Inf, 10]))
+          Parts: 1/1
+          Granules: 1/1
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
+      Indexes:
+        PrimaryKey
+          Keys: 
+            id
+            value
+          Condition: and((value in 1-element set), (id in (-Inf, 10]))
+          Parts: 1/1
+          Granules: 1/1
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
+      Indexes:
+        PrimaryKey
+          Keys: 
+            id
+            value
+          Condition: and((value in 5-element set), (id in (-Inf, 10]))
+          Parts: 1/1
+          Granules: 1/1
+CreatingSets
+  Expression
+    Expression
+      ReadFromMergeTree
+      Indexes:
+        PrimaryKey
+          Keys: 
+            id
+            value
+          Condition: and((value in 5-element set), (id in (-Inf, 10]))
+          Parts: 1/1
+          Granules: 1/1
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
index 077c49fb22e..585c2635970 100644
--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
@@ -7,18 +7,9 @@ CREATE TABLE test_table
 
 INSERT INTO test_table SELECT number, number FROM numbers(10);
 
-SET allow_experimental_analyzer = 0;
-
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
-
-SET allow_experimental_analyzer = 1;
-
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
-EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
 
 DROP TABLE test_table;
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 1cf1644fe0a..5c6a213a03f 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,12 +2,8 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
-          Granules: 6/6
-          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
-          Granules: 6/6
-          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
 ---
@@ -17,22 +13,14 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
-          Granules: 6/6
-          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
-          Granules: 6/6
-          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
-          Granules: 6/6
-          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
-          Granules: 6/6
-          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index 49d39c601ef..5db9697d018 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -38,20 +38,7 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes=1
-    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 SELECT *
 FROM
@@ -60,20 +47,7 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes=1
-    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 
 SELECT '---';
@@ -92,20 +66,7 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes = 1
-    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 SELECT *
 FROM
@@ -114,20 +75,7 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes = 1
-    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 SELECT '---';
 
@@ -145,19 +93,7 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes = 1
-    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 SELECT *
 FROM
@@ -166,20 +102,7 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 0;
-
-SELECT *
-FROM
-(
-    EXPLAIN PLAN indexes = 1
-    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
-)
-WHERE
-    explain LIKE '%Granules: %'
-SETTINGS
-  allow_experimental_analyzer = 1;
+    explain LIKE '%Granules: %';
 
 DROP TABLE tokenbf_tab;
 DROP TABLE ngrambf_tab;

From 1063d9ca5005c317cc6ce5dfaac6cbcfd271e9b7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 14:47:24 +0000
Subject: [PATCH 530/884] Use opd prewhere optimizaer for old analyzer.

---
 src/Interpreters/InterpreterSelectQuery.cpp | 56 ++++++++++-----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 187518b9f6c..d0cf9f1160c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -600,7 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query.setFinal();
     }
 
-    auto analyze = [&] (bool)
+    auto analyze = [&] (bool try_move_to_prewhere)
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
@@ -632,37 +632,37 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             view = nullptr;
         }
 
-        // if (try_move_to_prewhere
-        //     && storage && storage->canMoveConditionsToPrewhere()
-        //     && query.where() && !query.prewhere()
-        //     && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
-        // {
-        //     /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
-        //     if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
-        //     {
-        //         /// Extract column compressed sizes.
-        //         std::unordered_map<std::string, UInt64> column_compressed_sizes;
-        //         for (const auto & [name, sizes] : column_sizes)
-        //             column_compressed_sizes[name] = sizes.data_compressed;
+        if (try_move_to_prewhere
+            && storage && storage->canMoveConditionsToPrewhere()
+            && query.where() && !query.prewhere()
+            && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
+        {
+            /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
+            if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
+            {
+                /// Extract column compressed sizes.
+                std::unordered_map<std::string, UInt64> column_compressed_sizes;
+                for (const auto & [name, sizes] : column_sizes)
+                    column_compressed_sizes[name] = sizes.data_compressed;
 
-        //         SelectQueryInfo current_info;
-        //         current_info.query = query_ptr;
-        //         current_info.syntax_analyzer_result = syntax_analyzer_result;
+                SelectQueryInfo current_info;
+                current_info.query = query_ptr;
+                current_info.syntax_analyzer_result = syntax_analyzer_result;
 
-        //         Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
-        //         const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
+                Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
+                const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
 
-        //         MergeTreeWhereOptimizer where_optimizer{
-        //             std::move(column_compressed_sizes),
-        //             metadata_snapshot,
-        //             storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
-        //             queried_columns,
-        //             supported_prewhere_columns,
-        //             log};
+                MergeTreeWhereOptimizer where_optimizer{
+                    std::move(column_compressed_sizes),
+                    metadata_snapshot,
+                    storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
+                    queried_columns,
+                    supported_prewhere_columns,
+                    log};
 
-        //         where_optimizer.optimize(current_info, context);
-        //     }
-        // }
+                where_optimizer.optimize(current_info, context);
+            }
+        }
 
         if (query.prewhere() && query.where())
         {

From d3cccf856199d6cbd5c7eb1caff3ee0d9501351c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 15:44:51 +0000
Subject: [PATCH 531/884] Fixing tests.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  |  2 +
 ...f_indexes_support_match_function.reference | 12 +++
 ...ngrambf_indexes_support_match_function.sql | 89 +++++++++++++++++--
 3 files changed, 97 insertions(+), 6 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 93430e072bb..55f7e7cb85b 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -132,6 +132,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
         stack.pop_back();
     }
 
+    stack.push_back({.node = &root});
+
     while (!stack.empty())
     {
         {
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 5c6a213a03f..1cf1644fe0a 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,8 +2,12 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
 ---
@@ -13,14 +17,22 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index 5db9697d018..49d39c601ef 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -38,7 +38,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -47,7 +60,20 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 
 SELECT '---';
@@ -66,7 +92,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -75,7 +114,20 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT '---';
 
@@ -93,7 +145,19 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -102,7 +166,20 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-    explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 DROP TABLE tokenbf_tab;
 DROP TABLE ngrambf_tab;

From b23ba02d4c75eebdabfd90d03d44218420f93582 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Fri, 9 Feb 2024 16:50:37 +0100
Subject: [PATCH 532/884] Enforce tests with enabled analyzer in CI

---
 tests/ci/ci_config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 998d0876527..8ab6ceebb42 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -1130,6 +1130,8 @@ REQUIRED_CHECKS = [
     JobNames.UNIT_TEST,
     JobNames.UNIT_TEST_TSAN,
     JobNames.UNIT_TEST_UBSAN,
+    JobNames.INTEGRATION_TEST_ASAN_ANALYZER,
+    JobNames.STATELESS_TEST_ANALYZER_RELEASE,
 ]
 
 
From f056e8b2c400fd676a37ab59727ebaf6d929e27c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 9 Feb 2024 16:54:00 +0100
Subject: [PATCH 533/884] Handle different timestamp related aspects of
 zip-files

---
 .../build_and_deploy_archive.sh                 | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index 5b377d3c184..3c6c8e0ac1e 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -22,18 +22,29 @@ mkdir "$PACKAGE"
 cp app.py "$PACKAGE"
 if [ -f requirements.txt ]; then
   VENV=lambda-venv
-  rm -rf "$VENV" lambda-package.zip
+  rm -rf "$VENV"
   docker run --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \
     --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \
     -exc "
       '$PY_EXEC' -m venv '$VENV' &&
       source '$VENV/bin/activate' &&
-      pip install -r requirements.txt
+      pip install -r requirements.txt &&
+      # To have consistent pyc files
+      find '$VENV/lib' -name '*.pyc' -delete
+      find '$VENV/lib' ! -type d -exec touch -t 201212121212 {} +
+      python -m compileall
     "
   cp -rT "$VENV/lib/$PY_EXEC/site-packages/" "$PACKAGE"
   rm -r "$PACKAGE"/{pip,pip-*,setuptools,setuptools-*}
+  # zip stores metadata about timestamps
+  find "$PACKAGE" ! -type d -exec touch -t 201212121212 {} +
 fi
-( cd "$PACKAGE" && zip -9 -r ../"$PACKAGE".zip . )
+(
+  export LC_ALL=c
+  cd "$PACKAGE"
+  # zip uses random files order by default, so we sort the files alphabetically
+  find . ! -type d -print0 | sort -z | tr '\0' '\n' | zip -XD -0 ../"$PACKAGE".zip --names-stdin
+)
 
 ECHO=()
 if [ -n "$DRY_RUN" ]; then

From ca2f51493155f5ea2663880d209c1b0a13e4cc94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 9 Feb 2024 17:36:44 +0100
Subject: [PATCH 534/884] Fix msan unpoison

---
 src/Common/MemorySanitizer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h
index 02747d1e130..bd44ff62acb 100644
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@@ -13,7 +13,7 @@
 #undef __msan_unpoison_string
 
 #define __msan_unpoison(X, Y) /// NOLINT
-/// Given a pointer and **its size**, unpoisons up to 15 bytes **at the end**
+/// Given a pointer and **its size**, unpoisons 15 bytes **at the end**
 /// See memcmpSmall.h / memcpySmall.h
 #define __msan_unpoison_overflow_15(X, Y) /// NOLINT
 #define __msan_test_shadow(X, Y) (false) /// NOLINT
@@ -28,7 +28,7 @@
 #        undef __msan_unpoison_string
 #        include <sanitizer/msan_interface.h>
 #        undef __msan_unpoison_overflow_15
-#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE)], ((16 - ((PTR_SIZE) % 16)) % 16))
+#        define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE)], 15)
 #    endif
 #endif
 

From 16f3dbea713840ef9c79faa358330986acdc81a7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 16:43:48 +0000
Subject: [PATCH 535/884] Update optimizePrewjere

---
 .../QueryPlan/Optimizations/optimizePrewhere.cpp      |  4 ++--
 .../02149_read_in_order_fixed_prefix.reference        | 11 ++++-------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 2f790d9892f..ee5ad8d1a8a 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -139,8 +139,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     const auto & context = read_from_merge_tree->getContext();
     const auto & settings = context->getSettingsRef();
 
-    // if (!settings.allow_experimental_analyzer)
-    //     return;
+    if (!settings.allow_experimental_analyzer)
+        return;
 
     bool is_final = read_from_merge_tree->isQueryWithFinal();
     bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
index f2a4ef1f634..f3415a34823 100644
--- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
+++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
@@ -76,8 +76,7 @@ ExpressionTransform
         (Expression)
         ExpressionTransform
           (ReadFromMergeTree)
-          ExpressionTransform
-            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
+          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
 2020-10-11	0	0
 2020-10-11	0	10
 2020-10-11	0	20
@@ -106,8 +105,7 @@ ExpressionTransform
           (Expression)
           ExpressionTransform
             (ReadFromMergeTree)
-            ExpressionTransform
-              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
+            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
 2020-10-12	0
 2020-10-12	1
 2020-10-12	2
@@ -140,9 +138,8 @@ ExpressionTransform
         (Expression)
         ExpressionTransform
           (ReadFromMergeTree)
-          ExpressionTransform
-            ReverseTransform
-              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
+          ReverseTransform
+            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
 2020-10-12	99999
 2020-10-12	99998
 2020-10-12	99997

From 67d5e5885eb330bb41c502695671215fc16b791e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 18:32:42 +0100
Subject: [PATCH 536/884] Update
 tests/queries/0_stateless/02982_comments_in_system_tables.sh

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/queries/0_stateless/02982_comments_in_system_tables.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02982_comments_in_system_tables.sh b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
index 0825b1be2d6..2d7fbf4d35a 100755
--- a/tests/queries/0_stateless/02982_comments_in_system_tables.sh
+++ b/tests/queries/0_stateless/02982_comments_in_system_tables.sh
@@ -4,5 +4,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-${CLICKHOUSE_LOCAL}  --query "SELECT 'Table ' || database || '.' || name || ' doesn\'t have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
-${CLICKHOUSE_CLIENT} --query "SELECT 'Table ' || database || '.' || name || ' doesn\'t have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
+${CLICKHOUSE_LOCAL}  --query "SELECT 'Table ' || database || '.' || name || ' does not have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"
+${CLICKHOUSE_CLIENT} --query "SELECT 'Table ' || database || '.' || name || ' does not have a comment' FROM system.tables WHERE name NOT LIKE '%\_log\_%' AND database='system' AND comment==''"

From e54fbd3367b0fef10a82ec3b133129274b87c8d5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Feb 2024 18:32:54 +0100
Subject: [PATCH 537/884] Update tests/ci/ci_config.py

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/ci/ci_config.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 817018f044c..4a62d686e35 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -202,13 +202,6 @@ class JobConfig:
     release_only: bool = False
 
 
-# About the "sparse_checkout" option:
-#
-# It's needed only to test the option itself.
-# No particular sense to use it in the build,
-# and it is even slows down the job.
-
-
 @dataclass
 class BuildConfig:
     name: str
@@ -219,6 +212,8 @@ class BuildConfig:
     coverage: bool = False
     sanitizer: str = ""
     tidy: bool = False
+    # sparse_checkout is needed only to test the option itself.
+    # No particular sense to use it in every build, since it slows down the job.
     sparse_checkout: bool = False
     comment: str = ""
     static_binary_name: str = ""

From a76e07207aa7645316fdba83ae3db5398b650c1a Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 9 Feb 2024 18:58:15 +0100
Subject: [PATCH 538/884] Add new setting azure_max_single_part_copy_size to
 SettingsChangesHistory.h

---
 src/Core/SettingsChangesHistory.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 5bd38d600c1..c453dd837eb 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -92,7 +92,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
               {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
               {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
-              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
+              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

From 1d0a86ccdfb94863b09125a554a329b971b16587 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 18:31:51 +0000
Subject: [PATCH 539/884] Try to fix tests.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  |  9 ++-
 .../01763_filter_push_down_bugs.reference     |  2 +-
 .../01786_explain_merge_tree.reference        | 12 ---
 .../0_stateless/01786_explain_merge_tree.sh   |  4 +-
 ...771_ignore_data_skipping_indices.reference | 74 +++++++++----------
 .../02771_ignore_data_skipping_indices.sql    |  2 -
 ...ndex_in_function_different_types.reference | 44 +++++++++++
 ..._key_index_in_function_different_types.sql |  9 +++
 8 files changed, 99 insertions(+), 57 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 55f7e7cb85b..c64bc308246 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -120,6 +120,12 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
 
         auto & frame = stack.back();
 
+        if (frame.next_child == 0)
+        {
+            if (optimization_settings.distinct_in_order)
+                tryDistinctReadInOrder(frame.node);
+        }
+
         /// Traverse all children first.
         if (frame.next_child < frame.node->children.size())
         {
@@ -154,9 +160,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
 
                 if (optimization_settings.aggregation_in_order)
                     optimizeAggregationInOrder(*frame.node, nodes);
-
-                if (optimization_settings.distinct_in_order)
-                    tryDistinctReadInOrder(frame.node);
             }
 
             /// Traverse all children first.
diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
index c8045dd26f5..80bd7dfd8c0 100644
--- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
+++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
@@ -28,7 +28,7 @@ Expression ((Projection + Before ORDER BY))
 Expression ((Project names + Projection))
   Filter ((WHERE + DROP unused columns after JOIN))
     Join (JOIN FillRightFirst)
-      Expression (Change column names to column identifiers)
+      Expression
         ReadFromMergeTree (default.t1)
         Indexes:
           PrimaryKey
diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference
index 096090f8fa1..fd1bc713b08 100644
--- a/tests/queries/0_stateless/01786_explain_merge_tree.reference
+++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference
@@ -3,21 +3,18 @@
     MinMax
       Keys: 
         y
-      Condition: (y in [1, +Inf))
       Parts: 4/5
       Granules: 11/12
     Partition
       Keys: 
         y
         bitAnd(z, 3)
-      Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
       Parts: 3/4
       Granules: 10/11
     PrimaryKey
       Keys: 
         x
         y
-      Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
       Parts: 2/3
       Granules: 6/10
     Skip
@@ -37,7 +34,6 @@
             {
               "Type": "MinMax",
               "Keys": ["y"],
-              "Condition": "(y in [1, +Inf))",
               "Initial Parts": 5,
               "Selected Parts": 4,
               "Initial Granules": 12,
@@ -46,7 +42,6 @@
             {
               "Type": "Partition",
               "Keys": ["y", "bitAnd(z, 3)"],
-              "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
               "Initial Parts": 4,
               "Selected Parts": 3,
               "Initial Granules": 11,
@@ -55,7 +50,6 @@
             {
               "Type": "PrimaryKey",
               "Keys": ["x", "y"],
-              "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
               "Initial Parts": 3,
               "Selected Parts": 2,
               "Initial Granules": 10,
@@ -109,21 +103,18 @@
       MinMax
         Keys: 
           y
-        Condition: (y in [1, +Inf))
         Parts: 4/5
         Granules: 11/12
       Partition
         Keys: 
           y
           bitAnd(z, 3)
-        Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
         Parts: 3/4
         Granules: 10/11
       PrimaryKey
         Keys: 
           x
           y
-        Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
         Parts: 2/3
         Granules: 6/10
       Skip
@@ -138,7 +129,6 @@
                 {
                   "Type": "MinMax",
                   "Keys": ["y"],
-                  "Condition": "(y in [1, +Inf))",
                   "Initial Parts": 5,
                   "Selected Parts": 4,
                   "Initial Granules": 12,
@@ -147,7 +137,6 @@
                 {
                   "Type": "Partition",
                   "Keys": ["y", "bitAnd(z, 3)"],
-                  "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
                   "Initial Parts": 4,
                   "Selected Parts": 3,
                   "Initial Granules": 11,
@@ -156,7 +145,6 @@
                 {
                   "Type": "PrimaryKey",
                   "Keys": ["x", "y"],
-                  "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
                   "Initial Parts": 3,
                   "Selected Parts": 2,
                   "Initial Granules": 10,
diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh
index 23537013204..e3b28acdc41 100755
--- a/tests/queries/0_stateless/01786_explain_merge_tree.sh
+++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh
@@ -17,13 +17,13 @@ do
 
     $CH_CLIENT -q "
         explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14;
-        " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
+        " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
 
     echo "-----------------"
 
     $CH_CLIENT -q "
         explain indexes = 1, json = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14 format TSVRaw;
-        " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
+        " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
 
     echo "-----------------"
 
diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
index e23e3094ca3..33df18c8801 100644
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
@@ -1,43 +1,43 @@
 1	2	3
 1	2	3
 1	2	3
-    ReadFromMergeTree (default.data_02771)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 1/1
-      Skip
-        Name: x_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/1
-        Granules: 0/1
-      Skip
-        Name: y_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
-      Skip
-        Name: xy_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
-    ReadFromMergeTree (default.data_02771)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 1/1
-      Skip
-        Name: x_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/1
-        Granules: 0/1
-      Skip
-        Name: y_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
+  ReadFromMergeTree (default.data_02771)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 1/1
+    Skip
+      Name: x_idx
+      Description: minmax GRANULARITY 1
+      Parts: 0/1
+      Granules: 0/1
+    Skip
+      Name: y_idx
+      Description: minmax GRANULARITY 1
+      Parts: 0/0
+      Granules: 0/0
+    Skip
+      Name: xy_idx
+      Description: minmax GRANULARITY 1
+      Parts: 0/0
+      Granules: 0/0
+  ReadFromMergeTree (default.data_02771)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 1/1
+    Skip
+      Name: x_idx
+      Description: minmax GRANULARITY 1
+      Parts: 0/1
+      Granules: 0/1
+    Skip
+      Name: y_idx
+      Description: minmax GRANULARITY 1
+      Parts: 0/0
+      Granules: 0/0
     ReadFromMergeTree (default.data_02771)
     Indexes:
       PrimaryKey
diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
index 716421b7342..951d87fd2c0 100644
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
@@ -1,5 +1,3 @@
-SET allow_experimental_analyzer = 0;
-
 DROP TABLE IF EXISTS data_02771;
 
 
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
index 6338d048186..7a5e798359b 100644
--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
@@ -1,3 +1,47 @@
+CreatingSets
+  Expression
+    ReadFromMergeTree
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets
+  Expression
+    ReadFromMergeTree
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets
+  Expression
+    ReadFromMergeTree
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets
+  Expression
+    ReadFromMergeTree
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
 CreatingSets
   Expression
     Expression
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
index 585c2635970..1b1a7607344 100644
--- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
@@ -7,6 +7,15 @@ CREATE TABLE test_table
 
 INSERT INTO test_table SELECT number, number FROM numbers(10);
 
+set allow_experimental_analyzer = 0;
+
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+
+set allow_experimental_analyzer = 1;
+
 EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
 EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
 EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));

From 69e118e58734aa822f86d33c3596310509af3c42 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 9 Feb 2024 19:34:21 +0100
Subject: [PATCH 540/884] Fix potential endless loop during merge (#59812)

---
 src/Processors/Merges/Algorithms/MergedData.h       |  4 ++--
 src/Processors/Transforms/ColumnGathererTransform.h | 12 ++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h
index f92d20d22e1..7ffde835ad0 100644
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@@ -100,7 +100,7 @@ public:
         merged_rows = 0;
         sum_blocks_granularity = 0;
         ++total_chunks;
-        total_allocated_bytes += chunk.allocatedBytes();
+        total_allocated_bytes += chunk.bytes();
         need_flush = false;
 
         return chunk;
@@ -122,7 +122,7 @@ public:
         {
             size_t merged_bytes = 0;
             for (const auto & column : columns)
-                merged_bytes += column->allocatedBytes();
+                merged_bytes += column->byteSize();
             if (merged_bytes >= max_block_size_bytes)
                 return true;
         }
diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h
index 821d04db0df..4e56cffa46a 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.h
+++ b/src/Processors/Transforms/ColumnGathererTransform.h
@@ -145,10 +145,14 @@ void ColumnGathererStream::gather(Column & column_res)
 
     next_required_source = -1;
 
-    while (row_source_pos < row_sources_end
-        && column_res.size() < block_preferred_size_rows
-        && column_res.allocatedBytes() < block_preferred_size_bytes)
+
+    /// We use do ... while here to ensure there will be at least one iteration of this loop.
+    /// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point.
+    do
     {
+        if (row_source_pos >= row_sources_end)
+            break;
+
         RowSourcePart row_source = *row_source_pos;
         size_t source_num = row_source.getSourceNum();
         Source & source = sources[source_num];
@@ -191,7 +195,7 @@ void ColumnGathererStream::gather(Column & column_res)
         }
 
         source.pos += len;
-    }
+    } while (column_res.size() < block_preferred_size_rows && column_res.byteSize() < block_preferred_size_bytes);
 }
 
 }

From dc52def9ebfcd10ac826108ac846d3c4d00792e0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Feb 2024 12:33:17 +0300
Subject: [PATCH 541/884] Revert "Revert "Poco Logger small refactoring""

---
 base/poco/Foundation/CMakeLists.txt           |   6 +
 base/poco/Foundation/include/Poco/Logger.h    |  42 ++--
 .../include/Poco/RefCountedObject.h           |   3 +-
 base/poco/Foundation/src/Logger.cpp           | 226 +++++++++++-------
 src/Common/Logger.h                           |  12 +
 src/Common/tests/gtest_log.cpp                |  73 ++++++
 .../ObjectStorages/ObjectStorageFactory.cpp   |   2 +-
 .../ObjectStorages/Web/WebObjectStorage.cpp   |   2 +-
 utils/keeper-data-dumper/main.cpp             |   2 +-
 9 files changed, 255 insertions(+), 113 deletions(-)

diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt
index dfb41a33fb1..5fe644d3057 100644
--- a/base/poco/Foundation/CMakeLists.txt
+++ b/base/poco/Foundation/CMakeLists.txt
@@ -166,6 +166,12 @@ set (SRCS
 )
 
 add_library (_poco_foundation ${SRCS})
+target_link_libraries (_poco_foundation
+    PUBLIC
+        boost::headers_only
+        boost::system
+)
+
 add_library (Poco::Foundation ALIAS _poco_foundation)
 
 # TODO: remove these warning exclusions
diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h
index cf202718662..883294a071a 100644
--- a/base/poco/Foundation/include/Poco/Logger.h
+++ b/base/poco/Foundation/include/Poco/Logger.h
@@ -22,6 +22,9 @@
 #include <cstddef>
 #include <map>
 #include <vector>
+
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+
 #include "Poco/Channel.h"
 #include "Poco/Format.h"
 #include "Poco/Foundation.h"
@@ -34,7 +37,7 @@ namespace Poco
 
 class Exception;
 class Logger;
-using LoggerPtr = std::shared_ptr<Logger>;
+using LoggerPtr = boost::intrusive_ptr<Logger>;
 
 class Foundation_API Logger : public Channel
 /// Logger is a special Channel that acts as the main
@@ -871,21 +874,11 @@ public:
     /// If the Logger does not yet exist, it is created, based
     /// on its parent logger.
 
-    static LoggerPtr getShared(const std::string & name);
+    static LoggerPtr getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created = true);
     /// Returns a shared pointer to the Logger with the given name.
     /// If the Logger does not yet exist, it is created, based
     /// on its parent logger.
 
-    static Logger & unsafeGet(const std::string & name);
-    /// Returns a reference to the Logger with the given name.
-    /// If the Logger does not yet exist, it is created, based
-    /// on its parent logger.
-    ///
-    /// WARNING: This method is not thread safe. You should
-    /// probably use get() instead.
-    /// The only time this method should be used is during
-    /// program initialization, when only one thread is running.
-
     static Logger & create(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
     /// Creates and returns a reference to a Logger with the
     /// given name. The Logger's Channel and log level as set as
@@ -932,6 +925,16 @@ public:
 
     static const std::string ROOT; /// The name of the root logger ("").
 
+public:
+    struct LoggerEntry
+    {
+        Poco::Logger * logger;
+        bool owned_by_shared_ptr = false;
+    };
+
+    using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
+    using LoggerMapIterator = LoggerMap::iterator;
+
 protected:
     Logger(const std::string & name, Channel * pChannel, int level);
     ~Logger();
@@ -940,12 +943,19 @@ protected:
     void log(const std::string & text, Message::Priority prio, const char * file, int line);
 
     static std::string format(const std::string & fmt, int argc, std::string argv[]);
-    static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
-    static Logger & parent(const std::string & name);
-    static void add(Logger * pLogger);
-    static Logger * find(const std::string & name);
 
 private:
+    static std::pair<Logger::LoggerMapIterator, bool> unsafeGet(const std::string & name, bool get_shared);
+    static Logger * unsafeGetRawPtr(const std::string & name);
+    static std::pair<LoggerMapIterator, bool> unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
+    static Logger & parent(const std::string & name);
+    static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
+    static std::optional<LoggerMapIterator> find(const std::string & name);
+    static Logger * findRawPtr(const std::string & name);
+
+    friend void intrusive_ptr_add_ref(Logger * ptr);
+    friend void intrusive_ptr_release(Logger * ptr);
+
     Logger();
     Logger(const Logger &);
     Logger & operator=(const Logger &);
diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h
index db966089e00..1f806bdacb1 100644
--- a/base/poco/Foundation/include/Poco/RefCountedObject.h
+++ b/base/poco/Foundation/include/Poco/RefCountedObject.h
@@ -53,11 +53,10 @@ protected:
     virtual ~RefCountedObject();
     /// Destroys the RefCountedObject.
 
+    mutable std::atomic<size_t> _counter;
 private:
     RefCountedObject(const RefCountedObject &);
     RefCountedObject & operator=(const RefCountedObject &);
-
-    mutable std::atomic<size_t> _counter;
 };
 
 
diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp
index cfc063c8979..16fc3a0480e 100644
--- a/base/poco/Foundation/src/Logger.cpp
+++ b/base/poco/Foundation/src/Logger.cpp
@@ -38,14 +38,7 @@ std::mutex & getLoggerMutex()
 	return *logger_mutex;
 }
 
-struct LoggerEntry
-{
-	Poco::Logger * logger;
-	bool owned_by_shared_ptr = false;
-};
-
-using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
-LoggerMap * _pLoggerMap = nullptr;
+Poco::Logger::LoggerMap * _pLoggerMap = nullptr;
 
 }
 
@@ -309,38 +302,9 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le
 namespace
 {
 
-struct LoggerDeleter
-{
-	void operator()(Poco::Logger * logger)
-	{
-		std::lock_guard<std::mutex> lock(getLoggerMutex());
-
-		/// If logger infrastructure is destroyed just decrement logger reference count
-		if (!_pLoggerMap)
-		{
-			logger->release();
-			return;
-		}
-
-		auto it = _pLoggerMap->find(logger->name());
-		assert(it != _pLoggerMap->end());
-
-		/** If reference count is 1, this means this shared pointer owns logger
-		  * and need destroy it.
-		  */
-		size_t reference_count_before_release = logger->release();
-		if (reference_count_before_release == 1)
-		{
-			assert(it->second.owned_by_shared_ptr);
-			_pLoggerMap->erase(it);
-		}
-	}
-};
-
-
 inline LoggerPtr makeLoggerPtr(Logger & logger)
 {
-	return std::shared_ptr<Logger>(&logger, LoggerDeleter());
+	return LoggerPtr(&logger, false /*add_ref*/);
 }
 
 }
@@ -350,64 +314,87 @@ Logger& Logger::get(const std::string& name)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	Logger & logger = unsafeGet(name);
-
-	/** If there are already shared pointer created for this logger
-	  * we need to increment Logger reference count and now logger
-	  * is owned by logger infrastructure.
-	  */
-	auto it = _pLoggerMap->find(name);
-	if (it->second.owned_by_shared_ptr)
-	{
-		it->second.logger->duplicate();
-		it->second.owned_by_shared_ptr = false;
-	}
-
-	return logger;
+	auto [it, inserted] = unsafeGet(name, false /*get_shared*/);
+	return *it->second.logger;
 }
 
 
-LoggerPtr Logger::getShared(const std::string & name)
+LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
-	bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
+	auto [it, inserted] = unsafeGet(name, true /*get_shared*/);
 
-	Logger & logger = unsafeGet(name);
-
-	/** If logger already exists, then this shared pointer does not own it.
-	  * If logger does not exists, logger infrastructure could be already destroyed
-	  * or logger was created.
+	/** If during `unsafeGet` logger was created, then this shared pointer owns it.
+	  * If logger was already created, then this shared pointer does not own it.
 	  */
-	if (logger_exists)
+	if (inserted)
 	{
-		logger.duplicate();
-	}
-	else if (_pLoggerMap)
-	{
-		_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
+		if (should_be_owned_by_shared_ptr_if_created)
+			it->second.owned_by_shared_ptr = true;
+		else
+			it->second.logger->duplicate();
 	}
 
-	return makeLoggerPtr(logger);
+	return makeLoggerPtr(*it->second.logger);
 }
 
 
-Logger& Logger::unsafeGet(const std::string& name)
+std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string& name, bool get_shared)
 {
-	Logger* pLogger = find(name);
-	if (!pLogger)
+	std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
+
+	bool should_recreate_logger = false;
+
+	if (optional_logger_it)
 	{
+		auto & logger_it = *optional_logger_it;
+		std::optional<size_t> reference_count_before;
+
+		if (get_shared)
+		{
+			reference_count_before = logger_it->second.logger->duplicate();
+		}
+		else if (logger_it->second.owned_by_shared_ptr)
+		{
+			reference_count_before = logger_it->second.logger->duplicate();
+			logger_it->second.owned_by_shared_ptr = false;
+		}
+
+		/// Other thread already decided to delete this logger, but did not yet remove it from map
+		if (reference_count_before && reference_count_before == 0)
+			should_recreate_logger = true;
+	}
+
+	if (!optional_logger_it || should_recreate_logger)
+	{
+		Logger * logger = nullptr;
+
 		if (name == ROOT)
 		{
-			pLogger = new Logger(name, 0, Message::PRIO_INFORMATION);
+			logger = new Logger(name, nullptr, Message::PRIO_INFORMATION);
 		}
 		else
 		{
 			Logger& par = parent(name);
-			pLogger = new Logger(name, par.getChannel(), par.getLevel());
+			logger = new Logger(name, par.getChannel(), par.getLevel());
 		}
-		add(pLogger);
+
+		if (should_recreate_logger)
+		{
+			(*optional_logger_it)->second.logger = logger;
+			return std::make_pair(*optional_logger_it, true);
+		}
+
+		return add(logger);
 	}
-	return *pLogger;
+
+	return std::make_pair(*optional_logger_it, false);
+}
+
+
+Logger * Logger::unsafeGetRawPtr(const std::string & name)
+{
+	return unsafeGet(name, false /*get_shared*/).first->second.logger;
 }
 
 
@@ -415,24 +402,24 @@ Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	return unsafeCreate(name, pChannel, level);
+	return *unsafeCreate(name, pChannel, level).first->second.logger;
 }
 
 LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	Logger & logger = unsafeCreate(name, pChannel, level);
-	_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
+	auto [it, inserted] = unsafeCreate(name, pChannel, level);
+	it->second.owned_by_shared_ptr = true;
 
-	return makeLoggerPtr(logger);
+	return makeLoggerPtr(*it->second.logger);
 }
 
 Logger& Logger::root()
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	return unsafeGet(ROOT);
+	return *unsafeGetRawPtr(ROOT);
 }
 
 
@@ -440,7 +427,11 @@ Logger* Logger::has(const std::string& name)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 
-	return find(name);
+	auto optional_it = find(name);
+	if (!optional_it)
+		return nullptr;
+
+	return (*optional_it)->second.logger;
 }
 
 
@@ -459,20 +450,69 @@ void Logger::shutdown()
 		}
 
 		delete _pLoggerMap;
-		_pLoggerMap = 0;
+		_pLoggerMap = nullptr;
 	}
 }
 
 
-Logger* Logger::find(const std::string& name)
+std::optional<Logger::LoggerMapIterator> Logger::find(const std::string& name)
 {
 	if (_pLoggerMap)
 	{
 		LoggerMap::iterator it = _pLoggerMap->find(name);
 		if (it != _pLoggerMap->end())
-			return it->second.logger;
+			return it;
+
+		return {};
 	}
-	return 0;
+
+	return {};
+}
+
+Logger * Logger::findRawPtr(const std::string & name)
+{
+	auto optional_it = find(name);
+	if (!optional_it)
+		return nullptr;
+
+	return (*optional_it)->second.logger;
+}
+
+
+void intrusive_ptr_add_ref(Logger * ptr)
+{
+	ptr->duplicate();
+}
+
+
+void intrusive_ptr_release(Logger * ptr)
+{
+	size_t reference_count_before = ptr->_counter.fetch_sub(1, std::memory_order_acq_rel);
+	if (reference_count_before != 1)
+		return;
+
+	{
+		std::lock_guard<std::mutex> lock(getLoggerMutex());
+
+		if (_pLoggerMap)
+		{
+			auto it = _pLoggerMap->find(ptr->name());
+
+			/** It is possible that during release other thread created logger and
+			  * updated iterator in map.
+			  */
+			if (it != _pLoggerMap->end() && ptr == it->second.logger)
+			{
+				/** If reference count is 0, this means this intrusive pointer owns logger
+				  * and need destroy it.
+				  */
+				assert(it->second.owned_by_shared_ptr);
+				_pLoggerMap->erase(it);
+			}
+		}
+	}
+
+	delete ptr;
 }
 
 
@@ -490,28 +530,28 @@ void Logger::names(std::vector<std::string>& names)
 	}
 }
 
-Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
+
+std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
 {
 	if (find(name)) throw ExistsException();
 	Logger* pLogger = new Logger(name, pChannel, level);
-	add(pLogger);
-
-	return *pLogger;
+	return add(pLogger);
 }
 
+
 Logger& Logger::parent(const std::string& name)
 {
 	std::string::size_type pos = name.rfind('.');
 	if (pos != std::string::npos)
 	{
 		std::string pname = name.substr(0, pos);
-		Logger* pParent = find(pname);
+		Logger* pParent = findRawPtr(pname);
 		if (pParent)
 			return *pParent;
 		else
 			return parent(pname);
 	}
-	else return unsafeGet(ROOT);
+	else return *unsafeGetRawPtr(ROOT);
 }
 
 
@@ -579,12 +619,14 @@ namespace
 }
 
 
-void Logger::add(Logger* pLogger)
+std::pair<Logger::LoggerMapIterator, bool> Logger::add(Logger* pLogger)
 {
 	if (!_pLoggerMap)
-		_pLoggerMap = new LoggerMap;
+		_pLoggerMap = new Logger::LoggerMap;
 
-	_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
+	auto result = _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
+	assert(result.second);
+	return result;
 }
 
 
diff --git a/src/Common/Logger.h b/src/Common/Logger.h
index 6dcdea9a9d8..0425da8c847 100644
--- a/src/Common/Logger.h
+++ b/src/Common/Logger.h
@@ -2,6 +2,8 @@
 
 #include <memory>
 
+#include <base/defines.h>
+
 #include <Poco/Channel.h>
 #include <Poco/Logger.h>
 #include <Poco/Message.h>
@@ -24,6 +26,16 @@ using LoggerRawPtr = Poco::Logger *;
   */
 LoggerPtr getLogger(const std::string & name);
 
+/** Get Logger with specified name. If the Logger does not exists, it is created.
+  * This overload was added for specific purpose, when logger is constructed from constexpr string.
+  * Logger is destroyed only during program shutdown.
+  */
+template <size_t n>
+ALWAYS_INLINE LoggerPtr getLogger(const char (&name)[n])
+{
+    return Poco::Logger::getShared(name, false /*should_be_owned_by_shared_ptr_if_created*/);
+}
+
 /** Create Logger with specified name, channel and logging level.
   * If Logger already exists, throws exception.
   * Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.
diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp
index 622497fe2f5..6d2bd56ad77 100644
--- a/src/Common/tests/gtest_log.cpp
+++ b/src/Common/tests/gtest_log.cpp
@@ -9,6 +9,7 @@
 #include <Poco/NullChannel.h>
 #include <Poco/StreamChannel.h>
 #include <sstream>
+#include <thread>
 
 
 TEST(Logger, Log)
@@ -100,3 +101,75 @@ TEST(Logger, SideEffects)
 
     LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
 }
+
+TEST(Logger, SharedRawLogger)
+{
+    {
+        std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
+
+        auto shared_logger = getLogger("Logger_1");
+        shared_logger->setChannel(stream_channel.get());
+        shared_logger->setLevel("trace");
+
+        LOG_TRACE(shared_logger, "SharedLogger1Log1");
+        LOG_TRACE(getRawLogger("Logger_1"), "RawLogger1Log");
+        LOG_TRACE(shared_logger, "SharedLogger1Log2");
+
+        auto actual = stream.str();
+        EXPECT_EQ(actual, "SharedLogger1Log1\nRawLogger1Log\nSharedLogger1Log2\n");
+    }
+    {
+        std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
+
+        auto * raw_logger = getRawLogger("Logger_2");
+        raw_logger->setChannel(stream_channel.get());
+        raw_logger->setLevel("trace");
+
+        LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log1");
+        LOG_TRACE(raw_logger, "RawLogger2Log");
+        LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log2");
+
+        auto actual = stream.str();
+        EXPECT_EQ(actual, "SharedLogger2Log1\nRawLogger2Log\nSharedLogger2Log2\n");
+    }
+}
+
+TEST(Logger, SharedLoggersThreadSafety)
+{
+    static size_t threads_count = std::thread::hardware_concurrency();
+    static constexpr size_t loggers_count = 10;
+    static constexpr size_t logger_get_count = 1000;
+
+    Poco::Logger::root();
+
+    std::vector<std::string> names;
+
+    Poco::Logger::names(names);
+    size_t loggers_size_before = names.size();
+
+    std::vector<std::thread> threads;
+
+    for (size_t thread_index = 0; thread_index < threads_count; ++thread_index)
+    {
+        threads.emplace_back([]()
+        {
+            for (size_t logger_index = 0; logger_index < loggers_count; ++logger_index)
+            {
+                for (size_t iteration = 0; iteration < logger_get_count; ++iteration)
+                {
+                    getLogger("Logger_" + std::to_string(logger_index));
+                }
+            }
+        });
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    Poco::Logger::names(names);
+    size_t loggers_size_after = names.size();
+
+    EXPECT_EQ(loggers_size_before, loggers_size_after);
+}
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index 866373db44a..b3626135177 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -102,7 +102,7 @@ void checkS3Capabilities(
     if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash))
     {
         LOG_WARNING(
-            &Poco::Logger::get("S3ObjectStorage"),
+            getLogger("S3ObjectStorage"),
             "Storage for disk {} does not support batch delete operations, "
             "so `s3_capabilities.support_batch_delete` was automatically turned off during the access check. "
             "To remove this message set `s3_capabilities.support_batch_delete` for the disk to `false`.",
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 0223c24973e..786b23caf48 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -82,7 +82,7 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
             if (!inserted)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", file_path);
 
-            LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
+            LOG_TRACE(getLogger("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
             loaded_files.emplace_back(file_path);
         }
 
diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp
index e06b301edbf..9e107c99534 100644
--- a/utils/keeper-data-dumper/main.cpp
+++ b/utils/keeper-data-dumper/main.cpp
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
         Poco::Logger::root().setChannel(channel);
         Poco::Logger::root().setLevel("trace");
     }
-    auto * logger = &Poco::Logger::get("keeper-dumper");
+    auto logger = getLogger("keeper-dumper");
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
     CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();

From cb702f72ef040e3974d3f25e228aaeb8971cbdb7 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Feb 2024 16:30:04 +0300
Subject: [PATCH 542/884] Updated implementation

---
 base/poco/Foundation/CMakeLists.txt           |   6 -
 base/poco/Foundation/include/Poco/Logger.h    |   7 +-
 .../include/Poco/RefCountedObject.h           |   3 +-
 base/poco/Foundation/src/Logger.cpp           | 112 +++++++-----------
 4 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt
index 5fe644d3057..dfb41a33fb1 100644
--- a/base/poco/Foundation/CMakeLists.txt
+++ b/base/poco/Foundation/CMakeLists.txt
@@ -166,12 +166,6 @@ set (SRCS
 )
 
 add_library (_poco_foundation ${SRCS})
-target_link_libraries (_poco_foundation
-    PUBLIC
-        boost::headers_only
-        boost::system
-)
-
 add_library (Poco::Foundation ALIAS _poco_foundation)
 
 # TODO: remove these warning exclusions
diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h
index 883294a071a..2a1cb33b407 100644
--- a/base/poco/Foundation/include/Poco/Logger.h
+++ b/base/poco/Foundation/include/Poco/Logger.h
@@ -23,8 +23,6 @@
 #include <map>
 #include <vector>
 
-#include <boost/smart_ptr/intrusive_ptr.hpp>
-
 #include "Poco/Channel.h"
 #include "Poco/Format.h"
 #include "Poco/Foundation.h"
@@ -37,7 +35,7 @@ namespace Poco
 
 class Exception;
 class Logger;
-using LoggerPtr = boost::intrusive_ptr<Logger>;
+using LoggerPtr = std::shared_ptr<Logger>;
 
 class Foundation_API Logger : public Channel
 /// Logger is a special Channel that acts as the main
@@ -953,9 +951,6 @@ private:
     static std::optional<LoggerMapIterator> find(const std::string & name);
     static Logger * findRawPtr(const std::string & name);
 
-    friend void intrusive_ptr_add_ref(Logger * ptr);
-    friend void intrusive_ptr_release(Logger * ptr);
-
     Logger();
     Logger(const Logger &);
     Logger & operator=(const Logger &);
diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h
index 1f806bdacb1..db966089e00 100644
--- a/base/poco/Foundation/include/Poco/RefCountedObject.h
+++ b/base/poco/Foundation/include/Poco/RefCountedObject.h
@@ -53,10 +53,11 @@ protected:
     virtual ~RefCountedObject();
     /// Destroys the RefCountedObject.
 
-    mutable std::atomic<size_t> _counter;
 private:
     RefCountedObject(const RefCountedObject &);
     RefCountedObject & operator=(const RefCountedObject &);
+
+    mutable std::atomic<size_t> _counter;
 };
 
 
diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp
index 16fc3a0480e..779af384b0b 100644
--- a/base/poco/Foundation/src/Logger.cpp
+++ b/base/poco/Foundation/src/Logger.cpp
@@ -302,9 +302,40 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le
 namespace
 {
 
-inline LoggerPtr makeLoggerPtr(Logger & logger)
+struct LoggerDeleter
 {
-	return LoggerPtr(&logger, false /*add_ref*/);
+	void operator()(Poco::Logger * logger)
+	{
+		std::lock_guard<std::mutex> lock(getLoggerMutex());
+
+		/// If logger infrastructure is destroyed just decrement logger reference count
+		if (!_pLoggerMap)
+		{
+			logger->release();
+			return;
+		}
+
+		auto it = _pLoggerMap->find(logger->name());
+		assert(it != _pLoggerMap->end());
+
+		/** If reference count is 1, this means this shared pointer owns logger
+		  * and need destroy it.
+		  */
+		size_t reference_count_before_release = logger->release();
+		if (reference_count_before_release == 1)
+		{
+			assert(it->second.owned_by_shared_ptr);
+			_pLoggerMap->erase(it);
+		}
+	}
+};
+
+inline LoggerPtr makeLoggerPtr(Logger & logger, bool owned_by_shared_ptr)
+{
+	if (owned_by_shared_ptr)
+		return LoggerPtr(&logger, LoggerDeleter());
+
+	return LoggerPtr(std::shared_ptr<void>{}, &logger);
 }
 
 }
@@ -327,15 +358,10 @@ LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_sh
 	/** If during `unsafeGet` logger was created, then this shared pointer owns it.
 	  * If logger was already created, then this shared pointer does not own it.
 	  */
-	if (inserted)
-	{
-		if (should_be_owned_by_shared_ptr_if_created)
-			it->second.owned_by_shared_ptr = true;
-		else
-			it->second.logger->duplicate();
-	}
+	if (inserted && should_be_owned_by_shared_ptr_if_created)
+		it->second.owned_by_shared_ptr = true;
 
-	return makeLoggerPtr(*it->second.logger);
+	return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
 }
 
 
@@ -343,29 +369,20 @@ std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string&
 {
 	std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
 
-	bool should_recreate_logger = false;
-
 	if (optional_logger_it)
 	{
 		auto & logger_it = *optional_logger_it;
-		std::optional<size_t> reference_count_before;
 
-		if (get_shared)
+		if (logger_it->second.owned_by_shared_ptr)
 		{
-			reference_count_before = logger_it->second.logger->duplicate();
-		}
-		else if (logger_it->second.owned_by_shared_ptr)
-		{
-			reference_count_before = logger_it->second.logger->duplicate();
-			logger_it->second.owned_by_shared_ptr = false;
-		}
+			logger_it->second.logger->duplicate();
 
-		/// Other thread already decided to delete this logger, but did not yet remove it from map
-		if (reference_count_before && reference_count_before == 0)
-			should_recreate_logger = true;
+			if (!get_shared)
+				logger_it->second.owned_by_shared_ptr = false;
+		}
 	}
 
-	if (!optional_logger_it || should_recreate_logger)
+	if (!optional_logger_it)
 	{
 		Logger * logger = nullptr;
 
@@ -379,12 +396,6 @@ std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string&
 			logger = new Logger(name, par.getChannel(), par.getLevel());
 		}
 
-		if (should_recreate_logger)
-		{
-			(*optional_logger_it)->second.logger = logger;
-			return std::make_pair(*optional_logger_it, true);
-		}
-
 		return add(logger);
 	}
 
@@ -412,7 +423,7 @@ LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int
 	auto [it, inserted] = unsafeCreate(name, pChannel, level);
 	it->second.owned_by_shared_ptr = true;
 
-	return makeLoggerPtr(*it->second.logger);
+	return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
 }
 
 Logger& Logger::root()
@@ -479,43 +490,6 @@ Logger * Logger::findRawPtr(const std::string & name)
 }
 
 
-void intrusive_ptr_add_ref(Logger * ptr)
-{
-	ptr->duplicate();
-}
-
-
-void intrusive_ptr_release(Logger * ptr)
-{
-	size_t reference_count_before = ptr->_counter.fetch_sub(1, std::memory_order_acq_rel);
-	if (reference_count_before != 1)
-		return;
-
-	{
-		std::lock_guard<std::mutex> lock(getLoggerMutex());
-
-		if (_pLoggerMap)
-		{
-			auto it = _pLoggerMap->find(ptr->name());
-
-			/** It is possible that during release other thread created logger and
-			  * updated iterator in map.
-			  */
-			if (it != _pLoggerMap->end() && ptr == it->second.logger)
-			{
-				/** If reference count is 0, this means this intrusive pointer owns logger
-				  * and need destroy it.
-				  */
-				assert(it->second.owned_by_shared_ptr);
-				_pLoggerMap->erase(it);
-			}
-		}
-	}
-
-	delete ptr;
-}
-
-
 void Logger::names(std::vector<std::string>& names)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());

From 2fc8895ae8aae3cb861faf04e424482ffcd081b3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Feb 2024 20:04:51 +0300
Subject: [PATCH 543/884] Fixed tests

---
 src/Server/GRPCServer.cpp | 10 +++++-----
 src/Server/GRPCServer.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 7c532312612..15765f99b4b 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -76,7 +76,7 @@ namespace
         static std::once_flag once_flag;
         std::call_once(once_flag, [&config]
         {
-            static LoggerPtr logger = getLogger("grpc");
+            static LoggerRawPtr logger = getRawLogger("grpc");
             gpr_set_log_function([](gpr_log_func_args* args)
             {
                 if (args->severity == GPR_LOG_SEVERITY_DEBUG)
@@ -622,7 +622,7 @@ namespace
     class Call
     {
     public:
-        Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_);
+        Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_);
         ~Call();
 
         void start(const std::function<void(void)> & on_finish_call_callback);
@@ -664,7 +664,7 @@ namespace
         const CallType call_type;
         std::unique_ptr<BaseResponder> responder;
         IServer & iserver;
-        LoggerPtr log = nullptr;
+        LoggerRawPtr log = nullptr;
 
         std::optional<Session> session;
         ContextMutablePtr query_context;
@@ -726,7 +726,7 @@ namespace
     };
 // NOLINTEND(clang-analyzer-optin.performance.Padding)
 
-    Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_)
+    Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_)
         : call_type(call_type_), responder(std::move(responder_)), iserver(iserver_), log(log_)
     {
     }
@@ -1851,7 +1851,7 @@ private:
 GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
     : iserver(iserver_)
     , address_to_listen(address_to_listen_)
-    , log(getLogger("GRPCServer"))
+    , log(getRawLogger("GRPCServer"))
     , runner(std::make_unique<Runner>(*this))
 {}
 
diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h
index a9c8161298f..f86c2fe4ab7 100644
--- a/src/Server/GRPCServer.h
+++ b/src/Server/GRPCServer.h
@@ -47,7 +47,7 @@ private:
 
     IServer & iserver;
     const Poco::Net::SocketAddress address_to_listen;
-    LoggerPtr log;
+    LoggerRawPtr log;
     GRPCService grpc_service;
     std::unique_ptr<grpc::Server> grpc_server;
     std::unique_ptr<grpc::ServerCompletionQueue> queue;

From 9c3b363c81a2ea394a4b4d4c55468f6dcf7b8c8b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 9 Feb 2024 21:38:56 +0300
Subject: [PATCH 544/884] Fixed tests

---
 src/Server/GRPCServer.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h
index f86c2fe4ab7..70c16d3e9af 100644
--- a/src/Server/GRPCServer.h
+++ b/src/Server/GRPCServer.h
@@ -3,10 +3,11 @@
 #include "config.h"
 
 #if USE_GRPC
+
+#include "clickhouse_grpc.grpc.pb.h"
 #include <Poco/Net/SocketAddress.h>
 #include <base/types.h>
 #include <Common/Logger.h>
-#include "clickhouse_grpc.grpc.pb.h"
 
 namespace Poco { class Logger; }
 

From 769078e2d81cf8090c5b96bac24f80b5d3cae495 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 9 Feb 2024 19:32:53 +0000
Subject: [PATCH 545/884] Another attempt.

---
 src/Processors/QueryPlan/Optimizations/optimizeTree.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index c64bc308246..daf0a1b959b 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -122,6 +122,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
 
         if (frame.next_child == 0)
         {
+
+            if (optimization_settings.read_in_order)
+                optimizeReadInOrder(*frame.node, nodes);
+
             if (optimization_settings.distinct_in_order)
                 tryDistinctReadInOrder(frame.node);
         }
@@ -150,9 +154,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             {
                 has_reading_from_mt |= typeid_cast<const ReadFromMergeTree *>(frame.node->step.get()) != nullptr;
 
-                if (optimization_settings.read_in_order)
-                    optimizeReadInOrder(*frame.node, nodes);
-
                 /// Projection optimization relies on PK optimization
                 if (optimization_settings.optimize_projection)
                     num_applied_projection

From 99b66f75bddf42c008fc7bc7efa64e6881dd6dc7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 10 Feb 2024 02:27:55 +0100
Subject: [PATCH 546/884] Sequential run

---
 docker/test/stateless/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 55349c6c406..aec2add2857 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -188,7 +188,7 @@ function run_tests()
         # Too many tests fail for DatabaseReplicated in parallel.
         ADDITIONAL_OPTIONS+=('--jobs')
         ADDITIONAL_OPTIONS+=('2')
-    elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%sanitize-coverage%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then
+    elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%SANITIZE_COVERAGE%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then
         # Coverage on a per-test basis could only be collected sequentially.
         # Do not set the --jobs parameter.
         echo "Running tests with coverage collection."

From ed02154a4a5d00cc8bb0523582161f1342279511 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 10 Feb 2024 07:31:54 +0100
Subject: [PATCH 547/884] Fix "Too many parts"

---
 docker/test/base/setup_export_logs.sh                        | 2 +-
 .../0_stateless/01459_manual_write_to_replicas_quorum.sh     | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 917701b8d35..9e854dce65a 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -143,7 +143,7 @@ function setup_logs_replication
             time DateTime COMMENT 'The time of test run',
             test_name String COMMENT 'The name of the test',
             coverage Array(UInt64) COMMENT 'An array of addresses of the code (a subset of addresses instrumented for coverage) that were encountered during the test run'
-        ) ENGINE = MergeTree ORDER BY test_name COMMENT 'Contains information about per-test coverage from the CI'
+        ) ENGINE = Null COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster'
     "
 
     # For each system log table:
diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh
index 209e18e3329..379f83c6271 100755
--- a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh
+++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh
@@ -8,6 +8,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+# This test does many invocations of clickhouse-client in a loop,
+# leading to "Too many parts" in the system.coverage_log,
+# but we are not interested in client-side coverage here.
+unset CLICKHOUSE_WRITE_COVERAGE
+
 NUM_REPLICAS=10
 
 for i in $(seq 1 $NUM_REPLICAS); do

From 43d8a879fefbca12bd03328d8cb9d483223b6520 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 11:38:21 +0000
Subject: [PATCH 548/884] Fixing tests.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  |  10 +-
 .../02521_aggregation_by_partitions.reference | 141 +++++++++---------
 2 files changed, 74 insertions(+), 77 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index daf0a1b959b..816850cc82c 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -126,6 +126,9 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             if (optimization_settings.read_in_order)
                 optimizeReadInOrder(*frame.node, nodes);
 
+            if (optimization_settings.aggregation_in_order)
+                optimizeAggregationInOrder(*frame.node, nodes);
+
             if (optimization_settings.distinct_in_order)
                 tryDistinctReadInOrder(frame.node);
         }
@@ -139,6 +142,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             continue;
         }
 
+        enableMemoryBoundMerging(*stack.back().node, nodes);
+
         stack.pop_back();
     }
 
@@ -158,9 +163,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
                 if (optimization_settings.optimize_projection)
                     num_applied_projection
                         += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);
-
-                if (optimization_settings.aggregation_in_order)
-                    optimizeAggregationInOrder(*frame.node, nodes);
             }
 
             /// Traverse all children first.
@@ -192,8 +194,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             }
         }
 
-        enableMemoryBoundMerging(*stack.back().node, nodes);
-
         stack.pop_back();
     }
 
diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
index 67a131ff853..d32e6c7d416 100644
--- a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
+++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
@@ -91,19 +91,18 @@ ExpressionTransform × 16
         (Expression)
         ExpressionTransform × 4
           (ReadFromMergeTree)
-          ExpressionTransform × 4
-            MergingSortedTransform 2 → 1
-              ExpressionTransform × 2
-                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                  MergingSortedTransform 2 → 1
-                    ExpressionTransform × 2
-                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                        MergingSortedTransform 2 → 1
-                          ExpressionTransform × 2
-                            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                              MergingSortedTransform 2 → 1
-                                ExpressionTransform × 2
-                                  MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+          MergingSortedTransform 2 → 1
+            ExpressionTransform × 2
+              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                MergingSortedTransform 2 → 1
+                  ExpressionTransform × 2
+                    MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                      MergingSortedTransform 2 → 1
+                        ExpressionTransform × 2
+                          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                            MergingSortedTransform 2 → 1
+                              ExpressionTransform × 2
+                                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
 (Expression)
 ExpressionTransform × 16
@@ -114,41 +113,6 @@ ExpressionTransform × 16
         (Expression)
         ExpressionTransform × 8
           (ReadFromMergeTree)
-          ExpressionTransform × 8
-            MergingSortedTransform 2 → 1
-              ExpressionTransform × 2
-                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                  MergingSortedTransform 2 → 1
-                    ExpressionTransform × 2
-                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                        MergingSortedTransform 2 → 1
-                          ExpressionTransform × 2
-                            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                              MergingSortedTransform 2 → 1
-                                ExpressionTransform × 2
-                                  MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                    MergingSortedTransform 2 → 1
-                                      ExpressionTransform × 2
-                                        MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                          MergingSortedTransform 2 → 1
-                                            ExpressionTransform × 2
-                                              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                MergingSortedTransform 2 → 1
-                                                  ExpressionTransform × 2
-                                                    MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                      MergingSortedTransform 2 → 1
-                                                        ExpressionTransform × 2
-                                                          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-1000000
-(Expression)
-ExpressionTransform × 16
-  (Aggregating)
-  FinalizeAggregatedTransform × 16
-    AggregatingInOrderTransform × 16
-      (Expression)
-      ExpressionTransform × 16
-        (ReadFromMergeTree)
-        ExpressionTransform × 16
           MergingSortedTransform 2 → 1
             ExpressionTransform × 2
               MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
@@ -173,30 +137,63 @@ ExpressionTransform × 16
                                                     MergingSortedTransform 2 → 1
                                                       ExpressionTransform × 2
                                                         MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                          MergingSortedTransform 2 → 1
-                                                            ExpressionTransform × 2
-                                                              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                MergingSortedTransform 2 → 1
-                                                                  ExpressionTransform × 2
-                                                                    MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                      MergingSortedTransform 2 → 1
-                                                                        ExpressionTransform × 2
-                                                                          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                            MergingSortedTransform 2 → 1
-                                                                              ExpressionTransform × 2
-                                                                                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                                  MergingSortedTransform 2 → 1
-                                                                                    ExpressionTransform × 2
-                                                                                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                                        MergingSortedTransform 2 → 1
-                                                                                          ExpressionTransform × 2
-                                                                                            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                                              MergingSortedTransform 2 → 1
-                                                                                                ExpressionTransform × 2
-                                                                                                  MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
-                                                                                                    MergingSortedTransform 2 → 1
-                                                                                                      ExpressionTransform × 2
-                                                                                                        MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+1000000
+(Expression)
+ExpressionTransform × 16
+  (Aggregating)
+  FinalizeAggregatedTransform × 16
+    AggregatingInOrderTransform × 16
+      (Expression)
+      ExpressionTransform × 16
+        (ReadFromMergeTree)
+        MergingSortedTransform 2 → 1
+          ExpressionTransform × 2
+            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+              MergingSortedTransform 2 → 1
+                ExpressionTransform × 2
+                  MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                    MergingSortedTransform 2 → 1
+                      ExpressionTransform × 2
+                        MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                          MergingSortedTransform 2 → 1
+                            ExpressionTransform × 2
+                              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                MergingSortedTransform 2 → 1
+                                  ExpressionTransform × 2
+                                    MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                      MergingSortedTransform 2 → 1
+                                        ExpressionTransform × 2
+                                          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                            MergingSortedTransform 2 → 1
+                                              ExpressionTransform × 2
+                                                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                  MergingSortedTransform 2 → 1
+                                                    ExpressionTransform × 2
+                                                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                        MergingSortedTransform 2 → 1
+                                                          ExpressionTransform × 2
+                                                            MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                              MergingSortedTransform 2 → 1
+                                                                ExpressionTransform × 2
+                                                                  MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                    MergingSortedTransform 2 → 1
+                                                                      ExpressionTransform × 2
+                                                                        MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                          MergingSortedTransform 2 → 1
+                                                                            ExpressionTransform × 2
+                                                                              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                                MergingSortedTransform 2 → 1
+                                                                                  ExpressionTransform × 2
+                                                                                    MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                                      MergingSortedTransform 2 → 1
+                                                                                        ExpressionTransform × 2
+                                                                                          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                                            MergingSortedTransform 2 → 1
+                                                                                              ExpressionTransform × 2
+                                                                                                MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
+                                                                                                  MergingSortedTransform 2 → 1
+                                                                                                    ExpressionTransform × 2
+                                                                                                      MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
 Skip merging: 1
 Skip merging: 1

From 74caa8e44e66f0ce2803b088d8bd0bb796de5bef Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 11:43:35 +0000
Subject: [PATCH 549/884] Updaye test.

---
 .../02521_aggregation_by_partitions.reference        | 12 ++++++++++++
 .../0_stateless/02521_aggregation_by_partitions.sql  | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
index d32e6c7d416..87b2d5c3430 100644
--- a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
+++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference
@@ -1,3 +1,5 @@
+-- { echoOn }
+explain pipeline select a from t1 group by a;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -15,6 +17,8 @@ ExpressionTransform × 16
                     Resize 3 → 1
                       MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 3 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t2 group by a;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -40,6 +44,8 @@ ExpressionTransform × 16
                                     Resize 2 → 1
                                       MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t3 group by a;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -82,6 +88,8 @@ ExpressionTransform × 16
                                                                       MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
 1000000
+-- { echoOn }
+explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -104,6 +112,8 @@ ExpressionTransform × 16
                               ExpressionTransform × 2
                                 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -138,6 +148,8 @@ ExpressionTransform × 16
                                                       ExpressionTransform × 2
                                                         MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
 1000000
+-- { echoOn }
+explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql
index 87317e5fba4..5b013ca5aef 100644
--- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql
+++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql
@@ -15,7 +15,9 @@ system stop merges t1;
 insert into t1 select number from numbers_mt(1e6);
 insert into t1 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t1 group by a;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t1 group by a);
 
@@ -28,7 +30,9 @@ system stop merges t2;
 insert into t2 select number from numbers_mt(1e6);
 insert into t2 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t2 group by a;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t2 group by a);
 
@@ -41,7 +45,9 @@ system stop merges t3;
 insert into t3 select number from numbers_mt(1e6);
 insert into t3 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t3 group by a;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t3 group by a);
 
@@ -63,7 +69,9 @@ system stop merges t4;
 insert into t4 select number from numbers_mt(1e6);
 insert into t4 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t4 group by a);
 
@@ -76,7 +84,9 @@ system stop merges t5;
 insert into t5 select number from numbers_mt(1e6);
 insert into t5 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t5 group by a);
 
@@ -89,7 +99,9 @@ system stop merges t6;
 insert into t6 select number from numbers_mt(1e6);
 insert into t6 select number from numbers_mt(1e6);
 
+-- { echoOn }
 explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
+-- { echoOff }
 
 select count() from (select throwIf(count() != 2) from t6 group by a);
 

From bcae537810a221af9e83551bf3b083190e82f70e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 11:50:06 +0000
Subject: [PATCH 550/884] Remove commented code.

---
 .../Optimizations/optimizePrewhere.cpp        | 320 +-----------------
 1 file changed, 1 insertion(+), 319 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index ee5ad8d1a8a..6676f935b67 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -16,49 +16,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-// namespace
-// {
-
-// void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
-// {
-//     std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
-//     for (const auto * output_node : actions_dag->getOutputs())
-//         output_name_to_node.emplace(output_node->result_name, output_node);
-
-//     std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
-
-//     ActionsDAG::NodeRawConstPtrs updated_outputs;
-//     updated_outputs.reserve(expected_header.columns());
-
-//     for (const auto & column : expected_header)
-//     {
-//         auto output_node_it = output_name_to_node.find(column.name);
-//         if (output_node_it == output_name_to_node.end())
-//             throw Exception(ErrorCodes::LOGICAL_ERROR,
-//                 "Invalid move to PREWHERE optimization. Cannot find column {} in output",
-//                 column.name);
-
-//         updated_outputs.push_back(output_node_it->second);
-//         used_output_nodes.insert(output_node_it->second);
-//     }
-
-//     ActionsDAG::NodeRawConstPtrs unused_outputs;
-//     for (const auto * output_node : actions_dag->getOutputs())
-//     {
-//         if (used_output_nodes.contains(output_node))
-//             continue;
-
-//         unused_outputs.push_back(output_node);
-//     }
-
-//     auto & actions_dag_outputs = actions_dag->getOutputs();
-//     actions_dag_outputs = std::move(updated_outputs);
-//     actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
-// }
-
-// }
-
-
 namespace QueryPlanOptimizations
 {
 
@@ -102,40 +59,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     if (!filter_step)
         return;
 
-    /** Collect required filter output columns.
-      * Collect output nodes that are mapped to input nodes.
-      * Collect input node to output nodes mapping.
-      */
-    ColumnsWithTypeAndName required_columns_after_filter;
-    // std::unordered_set<std::string> output_nodes_mapped_to_input;
-    // std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
-
-    // for (const auto * output_node : filter_step->getExpression()->getOutputs())
-    // {
-    //     const auto * node_without_alias = output_node;
-    //     while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
-    //         node_without_alias = node_without_alias->children[0];
-
-    //     if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
-    //     {
-    //         output_nodes_mapped_to_input.emplace(output_node->result_name);
-
-    //         auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
-    //         if (output_names_it == input_node_to_output_names.end())
-    //         {
-    //             auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
-    //             output_names_it = insert_it;
-    //         }
-
-    //         output_names_it->second.push_back(output_node->result_name);
-    //     }
-
-    //     if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
-    //         continue;
-
-    //     required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
-    // }
-
     const auto & context = read_from_merge_tree->getContext();
     const auto & settings = context->getSettingsRef();
 
@@ -149,6 +72,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
 
     const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();
 
+    ColumnsWithTypeAndName required_columns_after_filter;
     if (read_from_merge_tree->isQueryWithSampling())
     {
         const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();
@@ -195,10 +119,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         prewhere_info = std::make_shared<PrewhereInfo>();
 
     prewhere_info->need_filter = true;
-    // std::cerr << filter_step->getExpression()->dumpDAG() << std::endl;
-
-    // QueryPlan::Node * replace_old_filter_node = nullptr;
-    // bool remove_filter_node = false;
 
     auto filter_expression = filter_step->getExpression();
     const auto & filter_column_name = filter_step->getFilterColumnName();
@@ -213,19 +133,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         outputs.resize(size);
     }
 
-    // if (!optimize_result.fully_moved_to_prewhere)
-    // {
     auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
 
-    // std::cerr << split_result.first->dumpDAG() << std::endl;
-    // std::cerr << split_result.second->dumpDAG() << std::endl;
-
-    // for (const auto * input : split_result.first->getInputs())
-    //     std::cerr << "in 1" << input->result_name << std::endl;
-    // for (const auto * input : split_result.second->getInputs())
-    //     std::cerr << "in 2" << input->result_name << std::endl;
-
-
     /// This is the leak of abstraction.
     /// Splited actions may have inputs which are needed only for PREWHERE.
     /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
@@ -239,13 +148,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     ///
     /// So, here we restore removed inputs for PREWHERE actions
     {
-        // const auto & virtuals = read_from_merge_tree->getVirtualColumnNames();
-        // NameSet virtual_names(virtuals.begin(), virtuals.end());
-
-        //std::unordered_set<const ActionsDAG::Node *> first_inputs(split_result.first->getInputs().begin(), split_result.first->getInputs().end());
         std::unordered_set<const ActionsDAG::Node *> first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
-        ///std::unordered_set<const ActionsDAG::Node *> second_inputs(split_result.second->getInputs().begin(), split_result.second->getInputs().end());
-
         for (const auto * input : split_result.first->getInputs())
         {
             if (!first_outputs.contains(input))
@@ -256,22 +159,12 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
                 split_result.second->addInput(input->result_name, input->result_type);
             }
         }
-
-        // NameSet input_columns;
-        // for (const auto * input : split_result.first->getInputs())
-        //     input_columns.insert(input->result_name);
-
-        // auto header = read_from_merge_tree->getStorageSnapshot()->getSampleBlockForColumns(read_from_merge_tree->getRealColumnNames());
-        // header = MergeTreeSelectProcessor::transformHeader(std::move(header), prewhere_info, {}, {});
     }
 
     ActionsDAG::NodeRawConstPtrs conditions;
     conditions.reserve(split_result.split_nodes_mapping.size());
     for (const auto * condition : optimize_result.prewhere_nodes)
-    {
-        // std::cerr << ".. " << condition->result_name << std::endl;
         conditions.push_back(split_result.split_nodes_mapping.at(condition));
-    }
 
     prewhere_info->prewhere_actions = std::move(split_result.first);
     prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
@@ -291,14 +184,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
         prewhere_info->prewhere_actions->getOutputs().push_back(node);
     }
 
-    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
-    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
-
     read_from_merge_tree->updatePrewhereInfo(prewhere_info);
 
-    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
-    // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
-
     if (!optimize_result.fully_moved_to_prewhere)
     {
         filter_node->step = std::make_unique<FilterStep>(
@@ -309,215 +196,10 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     }
     else
     {
-        // std::cerr << split_result.second->dumpDAG() << std::endl;
-        // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl;
-        // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl;
-
         filter_node->step = std::make_unique<ExpressionStep>(
             read_from_merge_tree->getOutputStream(),
             std::move(split_result.second));
     }
-    // return;
-    // }
-
-    // prewhere_info->prewhere_actions = filter_step->getExpression();
-    // prewhere_info->prewhere_actions->projectInput(false);
-    // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl;
-    // prewhere_info->prewhere_column_name = filter_step->getFilterColumnName();
-    // prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn();
-
-    // read_from_merge_tree->updatePrewhereInfo(prewhere_info);
-
-    // replace_old_filter_node = frame.node;
-    // remove_filter_node = true;
-
-    // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
-
-    // ActionsChain actions_chain;
-
-    // std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
-    // actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
-
-    // auto & filter_actions = optimize_result->filter_actions;
-
-    // /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
-    //   * 1. Filter expressions.
-    //   * 2. Prewhere filter expressions.
-    //   *
-    //   * There can be cases when all expressions are moved to PREWHERE, but it is not
-    //   * enough to produce required filter output columns.
-    //   *
-    //   * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
-    //   * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
-    //   * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
-    //   *
-    //   * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
-    //   */
-    // bool need_additional_filter_after_prewhere = false;
-
-    // if (!filter_actions)
-    // {
-    //     /// Any node from PREWHERE filter actions can be used as possible output node
-    //     std::unordered_set<std::string> possible_prewhere_output_nodes;
-    //     for (const auto & node : prewhere_filter_actions->getNodes())
-    //         possible_prewhere_output_nodes.insert(node.result_name);
-
-    //     for (auto & required_column : required_columns_after_filter)
-    //     {
-    //         if (!possible_prewhere_output_nodes.contains(required_column.name) &&
-    //             !output_nodes_mapped_to_input.contains(required_column.name))
-    //         {
-    //             need_additional_filter_after_prewhere = true;
-    //             break;
-    //         }
-    //     }
-    // }
-
-    // /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
-    //   * actions output columns as filter actions dag input columns.
-    //   * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
-    //   * PREWHERE filter actions.
-    //   */
-    // if (need_additional_filter_after_prewhere || filter_actions)
-    // {
-    //     auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
-    //     merged_filter_actions->getOutputs().clear();
-    //     merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
-
-    //     /// Add old filter step filter column to outputs
-    //     for (const auto & node : merged_filter_actions->getNodes())
-    //     {
-    //         if (node.result_name == filter_step->getFilterColumnName())
-    //         {
-    //             merged_filter_actions->getOutputs().push_back(&node);
-    //             break;
-    //         }
-    //     }
-
-    //     filter_actions = std::move(merged_filter_actions);
-
-    //     /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
-    //     prewhere_info->need_filter = false;
-
-    //     actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
-    // }
-
-    // auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
-    // actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
-
-    // actions_chain.finalize();
-
-    // prewhere_filter_actions->projectInput(false);
-
-    // auto & prewhere_actions_chain_node = actions_chain[0];
-    // prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
-    // prewhere_info->prewhere_column_name = prewere_filter_node_name;
-    // prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);
-
-    // read_from_merge_tree->updatePrewhereInfo(prewhere_info);
-
-    // QueryPlan::Node * replace_old_filter_node = nullptr;
-    // bool remove_filter_node = false;
-
-    // if (filter_actions)
-    // {
-    //     filter_actions->projectInput(false);
-
-    //     /// Match dag output nodes with old filter step header
-    //     matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
-
-    //     auto & filter_actions_chain_node = actions_chain[1];
-    //     bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
-    //     auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
-    //         filter_actions,
-    //         filter_step->getFilterColumnName(),
-    //         remove_filter_column);
-
-    //     auto & node = nodes.emplace_back();
-    //     node.children.emplace_back(frame.node);
-    //     node.step = std::move(after_prewhere_filter_step);
-
-    //     replace_old_filter_node = &node;
-    // }
-    // else
-    // {
-    //     auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
-    //     bool apply_rename_step = false;
-
-    //     ActionsDAG::NodeRawConstPtrs updated_outputs;
-
-    //     /** If in output after read from merge tree there are column names without aliases,
-    //       * apply old filter step aliases to them.
-    //       */
-    //     for (const auto * output_node : rename_actions_dag->getOutputs())
-    //     {
-    //         const auto alias_it = input_node_to_output_names.find(output_node->result_name);
-    //         if (alias_it == input_node_to_output_names.end())
-    //         {
-    //             updated_outputs.push_back(output_node);
-    //             continue;
-    //         }
-
-    //         for (auto & output_name : alias_it->second)
-    //         {
-    //             if (output_name == output_node->result_name)
-    //             {
-    //                 updated_outputs.push_back(output_node);
-    //                 continue;
-    //             }
-
-    //             updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
-    //             apply_rename_step = true;
-    //         }
-    //     }
-
-    //     rename_actions_dag->getOutputs() = std::move(updated_outputs);
-
-    //     bool apply_match_step = false;
-
-    //     /// If column order does not match old filter step column order, match dag output nodes with header
-    //     if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
-    //     {
-    //         apply_match_step = true;
-    //         matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
-    //     }
-
-    //     if (apply_rename_step || apply_match_step)
-    //     {
-    //         auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
-    //         if (apply_rename_step)
-    //             rename_step->setStepDescription("Change column names to column identifiers");
-
-    //         auto & node = nodes.emplace_back();
-    //         node.children.emplace_back(frame.node);
-    //         node.step = std::move(rename_step);
-
-    //         replace_old_filter_node = &node;
-    //     }
-    //     else
-    //     {
-    //         replace_old_filter_node = frame.node;
-    //         remove_filter_node = true;
-    //     }
-    // }
-
-    // QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
-
-    // for (auto & filter_parent_child : filter_parent_node->children)
-    // {
-    //     if (filter_parent_child == filter_node)
-    //     {
-    //         filter_parent_child = frame.node;
-
-    //         size_t stack_size = stack.size();
-
-    //         /// Step is completely replaced with PREWHERE filter actions, remove it from stack.
-    //         std::swap(stack[stack_size - 1], stack[stack_size - 2]);
-    //         stack.pop_back();
-
-    //         break;
-    //     }
-    // }
 }
 
 }

From 8b3c27d45bf6721c97231de0bba063463ecf2e0f Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 10 Feb 2024 15:00:31 +0300
Subject: [PATCH 551/884] Updated a list of trusted contributors

---
 tests/ci/lambda_shared_package/lambda_shared/pr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py
index ce38475b3ee..a23d4bbb4c2 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/pr.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py
@@ -44,6 +44,7 @@ TRUSTED_CONTRIBUTORS = {
         "kitaisreal",
         "k-morozov",  # Konstantin Morozov, Yandex Cloud
         "justindeguzman",  # ClickHouse, Inc
+        "jrdi", # ClickHouse contributor, TinyBird
     ]
 }
 

From fb8723a15c00cc6eff7c2ea2765a528788d11b59 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 13:13:26 +0100
Subject: [PATCH 552/884] Update pr.py

---
 tests/ci/lambda_shared_package/lambda_shared/pr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py
index a23d4bbb4c2..1b4f827cc0a 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/pr.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py
@@ -44,7 +44,7 @@ TRUSTED_CONTRIBUTORS = {
         "kitaisreal",
         "k-morozov",  # Konstantin Morozov, Yandex Cloud
         "justindeguzman",  # ClickHouse, Inc
-        "jrdi", # ClickHouse contributor, TinyBird
+        "jrdi",  # ClickHouse contributor, TinyBird
     ]
 }
 

From f1193bcd5dbf320195d8c99b2c64322fcbdafd64 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 10 Feb 2024 13:22:58 +0000
Subject: [PATCH 553/884] Fix spellcheck

---
 .../aspell-ignore/en/aspell-dict.txt            | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 51aa8222a89..2c9a020f71b 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -190,6 +190,7 @@ CustomSeparatedWithNamesAndTypes
 DBAs
 DBMSs
 DBeaver
+DD
 DDLWORKER
 DDLWorker
 DDLWorkerThreads
@@ -215,7 +216,6 @@ DatabaseOrdinaryThreadsActive
 DateTime
 DateTimes
 DbCL
-DD
 Decrypted
 Deduplicate
 Deduplication
@@ -840,6 +840,7 @@ Sematext
 SendExternalTables
 SendScalars
 ShareAlike
+Shortkeys
 SimHash
 Simhash
 SimpleAggregateFunction
@@ -952,8 +953,8 @@ TotalRowsOfMergeTreeTables
 TotalTemporaryFiles
 Tradeoff
 Transactional
-TwoColumnList
 Tukey
+TwoColumnList
 UBSan
 UDFs
 UInt
@@ -1325,6 +1326,7 @@ cosineDistance
 countDigits
 countEqual
 countMatches
+countMatchesCaseInsensitiveb
 countSubstrings
 covarPop
 covarSamp
@@ -1688,6 +1690,7 @@ hudi
 hyperscan
 hypot
 hyvor
+iTerm
 icosahedron
 icudata
 idempotency
@@ -2132,6 +2135,7 @@ py
 qryn
 quantile
 quantileBFloat
+quantileDD
 quantileDeterministic
 quantileExact
 quantileExactExclusive
@@ -2146,6 +2150,7 @@ quantileTDigestWeighted
 quantileTiming
 quantileTimingWeighted
 quantilebfloat
+quantileddsketch
 quantiledeterministic
 quantileexact
 quantileexactweighted
@@ -2158,8 +2163,6 @@ quantiletdigest
 quantiletdigestweighted
 quantiletiming
 quantiletimingweighted
-quantileddsketch
-quantileDD
 quartile
 queryID
 queryString
@@ -2292,8 +2295,8 @@ seektable
 sequenceCount
 sequenceMatch
 sequenceNextNode
-seriesOutliersDetectTukey
 seriesDecomposeSTL
+seriesOutliersDetectTukey
 seriesPeriodDetectFFT
 serverTimeZone
 serverTimezone
@@ -2307,6 +2310,7 @@ shardNum
 sharded
 sharding
 shortcircuit
+shortkeys
 shoutout
 simdjson
 simpleLinearRegression
@@ -2730,6 +2734,3 @@ znode
 znodes
 zookeeperSessionUptime
 zstd
-iTerm
-shortkeys
-Shortkeys

From a6337fb6b0221fedb138c50feeb176776d572b5d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 10 Feb 2024 13:42:14 +0000
Subject: [PATCH 554/884] Incorporate review feedback

---
 src/Functions/countMatches.h                           | 2 +-
 tests/queries/0_stateless/01595_countMatches.reference | 2 ++
 tests/queries/0_stateless/01595_countMatches.sql       | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 04e86f31884..3ee049ad457 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -98,7 +98,7 @@ public:
             return col_res;
         }
         else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Error in FunctionCountMatches::getReturnTypeImpl()");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast haystack argument to String or FixedString");
     }
 
     static uint64_t countMatches(std::string_view src, const OptimizedRegularExpression & re, OptimizedRegularExpression::MatchVec & matches)
diff --git a/tests/queries/0_stateless/01595_countMatches.reference b/tests/queries/0_stateless/01595_countMatches.reference
index 025db39f1fe..394c8508430 100644
--- a/tests/queries/0_stateless/01595_countMatches.reference
+++ b/tests/queries/0_stateless/01595_countMatches.reference
@@ -12,6 +12,7 @@ case sensitive
 2
 4
 4
+2
 case insensitive
 2
 1
@@ -21,6 +22,7 @@ case insensitive
 2
 4
 4
+2
 errors
 FixedString
 2
diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql
index 5c016c52a4e..0c2982572cd 100644
--- a/tests/queries/0_stateless/01595_countMatches.sql
+++ b/tests/queries/0_stateless/01595_countMatches.sql
@@ -14,6 +14,7 @@ select countMatches(concat(toString(number), 'foofoo'), 'foo') from numbers(2);
 select countMatches('foobarbazfoobarbaz', 'foo(bar)(?:baz|)');
 select countMatches('foo.com bar.com baz.com bam.com', '([^. ]+)\.([^. ]+)');
 select countMatches('foo.com@foo.com bar.com@foo.com baz.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)');
+select countMatches(materialize('foobarfoo'), 'foo');
 
 select 'case insensitive';
 select countMatchesCaseInsensitive('foobarfoo', 'FOo');
@@ -23,6 +24,7 @@ select countMatchesCaseInsensitive(concat(toString(number), 'Foofoo'), 'foo') fr
 select countMatchesCaseInsensitive('foOBarBAZfoobarbaz', 'foo(bar)(?:baz|)');
 select countMatchesCaseInsensitive('foo.com BAR.COM baz.com bam.com', '([^. ]+)\.([^. ]+)');
 select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)');
+select countMatchesCaseInsensitive(materialize('foobarfoo'), 'FOo');
 
 select 'errors';
 select countMatches(1, 'foo') from numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

From e6d96e43252df4df9198a0c08bd053b168aff097 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 10 Feb 2024 14:49:43 +0100
Subject: [PATCH 555/884] Better warning for disabled kernel.task_delayacct

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/server/Server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 7d3953ae37e..f00da445c16 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -557,7 +557,7 @@ static void sanityChecks(Server & server)
     {
         const char * filename = "/proc/sys/kernel/task_delayacct";
         if (readNumber(filename) == 0)
-            server.context()->addWarningMessage("Delay accounting is not enabled, OSIOWaitMicroseconds will not be gathered. Check " + String(filename));
+            server.context()->addWarningMessage("Delay accounting is not enabled, OSIOWaitMicroseconds will not be gathered. You can enable it using `echo 1 > " + String(filename) + "` or by using sysctl.");
     }
     catch (...) // NOLINT(bugprone-empty-catch)
     {

From 8b25d868e269de6407377657b406114c62a0f2be Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 17:04:27 +0100
Subject: [PATCH 556/884] Update optimizePrewhere.cpp

---
 src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 6676f935b67..ec07f028f20 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -11,11 +11,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 namespace QueryPlanOptimizations
 {
 

From 04f61781768b6f6bbd3a81e1c4e61d2d343c3410 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 10 Feb 2024 16:32:49 +0000
Subject: [PATCH 557/884] Fix spellcheck, pt. II

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 2c9a020f71b..0b29b0f0709 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1326,7 +1326,7 @@ cosineDistance
 countDigits
 countEqual
 countMatches
-countMatchesCaseInsensitiveb
+countMatchesCaseInsensitive
 countSubstrings
 covarPop
 covarSamp

From 306f5047818ea3dc13ec5575c5d6d96f5b25373f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Sat, 10 Feb 2024 19:53:03 +0000
Subject: [PATCH 558/884] Fixing test.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp          | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 816850cc82c..fafd6d1dc00 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -126,9 +126,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             if (optimization_settings.read_in_order)
                 optimizeReadInOrder(*frame.node, nodes);
 
-            if (optimization_settings.aggregation_in_order)
-                optimizeAggregationInOrder(*frame.node, nodes);
-
             if (optimization_settings.distinct_in_order)
                 tryDistinctReadInOrder(frame.node);
         }
@@ -142,8 +139,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             continue;
         }
 
-        enableMemoryBoundMerging(*stack.back().node, nodes);
-
         stack.pop_back();
     }
 
@@ -163,6 +158,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
                 if (optimization_settings.optimize_projection)
                     num_applied_projection
                         += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);
+
+
+                if (optimization_settings.aggregation_in_order)
+                    optimizeAggregationInOrder(*frame.node, nodes);
             }
 
             /// Traverse all children first.
@@ -194,6 +193,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
             }
         }
 
+        enableMemoryBoundMerging(*stack.back().node, nodes);
+
         stack.pop_back();
     }
 

From 40ea04faf57226e5d67c4f2224dee4e5a9ff488e Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sat, 10 Feb 2024 13:44:55 +0100
Subject: [PATCH 559/884] Add check that recursiveRemoveLowCardinality()
 doesn't change the size of a column.

---
 .../DataTypeLowCardinalityHelpers.cpp         | 60 +++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
index 98eb76267a4..116e806f89c 100644
--- a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
+++ b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
@@ -20,6 +20,7 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int TYPE_MISMATCH;
+    extern const int LOGICAL_ERROR;
 }
 
 DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
@@ -55,62 +56,61 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
 
 ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
 {
-    if (!column)
-        return column;
+    ColumnPtr res = column;
 
     if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get()))
     {
         const auto & data = column_array->getDataPtr();
         auto data_no_lc = recursiveRemoveLowCardinality(data);
-        if (data.get() == data_no_lc.get())
-            return column;
-
-        return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr());
+        if (data.get() != data_no_lc.get())
+            res = ColumnArray::create(data_no_lc, column_array->getOffsetsPtr());
     }
-
-    if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+    else if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
     {
         const auto & nested = column_const->getDataColumnPtr();
         auto nested_no_lc = recursiveRemoveLowCardinality(nested);
-        if (nested.get() == nested_no_lc.get())
-            return column;
-
-        return ColumnConst::create(nested_no_lc, column_const->size());
+        if (nested.get() != nested_no_lc.get())
+            res = ColumnConst::create(nested_no_lc, column_const->size());
     }
-
-    if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
+    else if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
     {
         auto columns = column_tuple->getColumns();
         for (auto & element : columns)
             element = recursiveRemoveLowCardinality(element);
-        return ColumnTuple::create(columns);
+        res = ColumnTuple::create(columns);
     }
-
-    if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get()))
+    else if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get()))
     {
         const auto & nested = column_map->getNestedColumnPtr();
         auto nested_no_lc = recursiveRemoveLowCardinality(nested);
-        if (nested.get() == nested_no_lc.get())
-            return column;
-
-        return ColumnMap::create(nested_no_lc);
+        if (nested.get() != nested_no_lc.get())
+            res = ColumnMap::create(nested_no_lc);
     }
-
     /// Special case when column is a lazy argument of short circuit function.
     /// We should call recursiveRemoveLowCardinality on the result column
     /// when function will be executed.
-    if (const auto * column_function = typeid_cast<const ColumnFunction *>(column.get()))
+    else if (const auto * column_function = typeid_cast<const ColumnFunction *>(column.get()))
     {
-        if (!column_function->isShortCircuitArgument())
-            return column;
-
-        return column_function->recursivelyConvertResultToFullColumnIfLowCardinality();
+        if (column_function->isShortCircuitArgument())
+            res = column_function->recursivelyConvertResultToFullColumnIfLowCardinality();
+    }
+    else if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
+    {
+        res = column_low_cardinality->convertToFullColumn();
     }
 
-    if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
-        return column_low_cardinality->convertToFullColumn();
+    if (res != column)
+    {
+        /// recursiveRemoveLowCardinality() must not change the size of a passed column!
+        if (res->size() != column->size())
+        {
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "recursiveRemoveLowCardinality() somehow changed the size of column {}. Old size={}, new size={}. It's a bug",
+                            column->getName(), column->size(), res->size());
+        }
+    }
 
-    return column;
+    return res;
 }
 
 ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)

From 5424f235a92331c8ed2dba72b29de24d3ff1e8f7 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Sat, 10 Feb 2024 00:27:15 +0000
Subject: [PATCH 560/884] Fix StorageURL doing some of the query execution in
 one thread instead of max_threads

---
 src/Storages/StorageURL.cpp                                 | 6 ++++--
 .../0_stateless/02723_parallelize_output_setting.reference  | 4 ++++
 .../0_stateless/02723_parallelize_output_setting.sql        | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 433f4ed7700..ce9b0cd366b 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -904,6 +904,7 @@ public:
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
+        , max_num_streams(num_streams_)
     {
     }
 
@@ -920,6 +921,7 @@ private:
 
     size_t max_block_size;
     size_t num_streams;
+    const size_t max_num_streams;
 
     std::shared_ptr<StorageURLSource::IteratorWrapper> iterator_wrapper;
     bool is_url_with_globs = false;
@@ -1093,8 +1095,8 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
     auto pipe = Pipe::unitePipes(std::move(pipes));
     size_t output_ports = pipe.numOutputPorts();
     const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
-    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < num_streams)
-        pipe.resize(num_streams);
+    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
+        pipe.resize(max_num_streams);
 
     if (pipe.empty())
         pipe = Pipe(std::make_shared<NullSource>(info.source_header));
diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.reference b/tests/queries/0_stateless/02723_parallelize_output_setting.reference
index 0f2a396f471..36e4e68ecd5 100644
--- a/tests/queries/0_stateless/02723_parallelize_output_setting.reference
+++ b/tests/queries/0_stateless/02723_parallelize_output_setting.reference
@@ -5,3 +5,7 @@ select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline s
 -- no Resize in pipeline
 set parallelize_output_from_storages=0;
 select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
+-- Data from URL source is immediately resized to max_treads streams, before any ExpressionTransform.
+set parallelize_output_from_storages=1;
+select match(arrayStringConcat(groupArray(explain), ''), '.*Resize 1 → 2 *URL 0 → 1 *$') from (explain pipeline select x, count() from url('https://example.com', Parquet, 'x Int64') group by x order by count() limit 10);
+1
diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.sql b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
index 7db28ca4dec..12786b80f69 100644
--- a/tests/queries/0_stateless/02723_parallelize_output_setting.sql
+++ b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
@@ -10,3 +10,6 @@ select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline s
 set parallelize_output_from_storages=0;
 select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
 
+-- Data from URL source is immediately resized to max_treads streams, before any ExpressionTransform.
+set parallelize_output_from_storages=1;
+select match(arrayStringConcat(groupArray(explain), ''), '.*Resize 1 → 2 *URL 0 → 1 *$') from (explain pipeline select x, count() from url('https://example.com', Parquet, 'x Int64') group by x order by count() limit 10);
\ No newline at end of file

From b5fd68a2b6b9c50accc62671f1b87ea4cd30785c Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Sat, 10 Feb 2024 03:40:55 +0000
Subject: [PATCH 561/884] no-fasttest

---
 tests/queries/0_stateless/02723_parallelize_output_setting.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.sql b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
index 12786b80f69..86e6d4b4e3d 100644
--- a/tests/queries/0_stateless/02723_parallelize_output_setting.sql
+++ b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-fasttest
 
 insert into function file(data_02723.csv) select number from numbers(5) settings engine_file_truncate_on_insert=1;
 

From 64e53fee7bb935a7be84aed5ecc31300e18e4b3d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 11 Feb 2024 10:07:21 +0100
Subject: [PATCH 562/884] Reintroduce
 02590_interserver_mode_client_info_initial_query_start_time

Reintroduce the test without assumption that initial_query_start_time is
the same as query_start_time.

This reverts commit 075da5602fdc03d4d5b15cd8d769704259b168a9.
---
 ...nt_info_initial_query_start_time.reference |  8 +++
 ...de_client_info_initial_query_start_time.sh | 67 +++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
 create mode 100755 tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh

diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
new file mode 100644
index 00000000000..eafcfc23fb8
--- /dev/null
+++ b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference
@@ -0,0 +1,8 @@
+SELECT
+3	1
+3	1
+INSERT
+CHECK
+1
+2
+6	2
diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh
new file mode 100755
index 00000000000..3b0d2309784
--- /dev/null
+++ b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag no-fasttest: interserver mode requires SSL
+#
+# Test that checks that some of ClientInfo correctly passed in inter-server mode.
+# NOTE: we need .sh test (.sql is not enough) because queries on remote nodes does not have current_database = currentDatabase()
+#
+# Check-style suppression: select * from system.query_log where current_database = currentDatabase();
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+function get_query_id() { random_str 10; }
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists buf;
+    drop table if exists dist;
+    drop table if exists data;
+
+    create table data (key Int) engine=Memory();
+    create table dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), data, key);
+    create table dist_dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), dist, key);
+    system stop distributed sends dist;
+"
+
+echo "SELECT"
+query_id="$(get_query_id)"
+# initialize connection, but actually if there are other tables that uses this
+# cluster then, it will be created long time ago, but this is OK for this
+# test, since we care about the difference between NOW() and there should
+# not be any significant difference.
+$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist"
+$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
+    system flush logs;
+    select count(), count(distinct initial_query_start_time_microseconds) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
+"
+
+sleep 1
+
+query_id="$(get_query_id)"
+# this query (and all subsequent) should reuse the previous connection (at least most of the time)
+$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist"
+
+$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
+    system flush logs;
+    select count(), count(distinct initial_query_start_time_microseconds) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
+"
+
+echo "INSERT"
+query_id="$(get_query_id)"
+$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -nm -q "
+    insert into dist_dist values (1),(2);
+    select * from data;
+"
+
+sleep 1
+$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist_dist"
+sleep 1
+$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist"
+
+echo "CHECK"
+$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "
+    select * from data order by key;
+    system flush logs;
+    select count(), count(distinct initial_query_start_time_microseconds) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String};
+"

From a3938e7df0c1b4a8c38b19a23cdb6253f86dc330 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 11 Feb 2024 10:42:36 +0100
Subject: [PATCH 563/884] Respect CMAKE_OSX_DEPLOYMENT_TARGET for Rust targets

This should fix the following warnings:

    ld: warning: object file (rust/prql/Debug/lib_ch_rust_prql.a[74](aarch_aapcs64.o)) was built for newer 'macOS' version (14.0) than being linked (11.0)
    ld: warning: object file (rust/skim/Debug/lib_ch_rust_skim_rust.a[40](c0e230e4543ad627-lib.rs.o)) was built for newer 'macOS' version (14.0) than being linked (11.0)
    ld: warning: object file (rust/skim/Debug/lib_ch_rust_skim_rust.a[46](6c3a6ae2099d4914-cxx.o)) was built for newer 'macOS' version (14.0) than being linked (11.0)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 rust/CMakeLists.txt | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt
index 66694ee16f8..6715a54221a 100644
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@@ -14,6 +14,11 @@ macro(configure_rustc)
         set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}")
     endif()
 
+    if (CMAKE_OSX_DEPLOYMENT_TARGET)
+        set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+        set(RUST_CFLAGS "${RUST_CFLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+    endif()
+
     if (USE_MUSL)
         set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -D_LIBCPP_HAS_MUSL_LIBC=1")
     endif ()
@@ -25,14 +30,23 @@ macro(configure_rustc)
         set(RUSTCWRAPPER "")
     endif()
 
-    set(RUSTFLAGS "[]")
+    set(RUSTFLAGS)
+    if (CMAKE_OSX_DEPLOYMENT_TARGET)
+        list(APPEND RUSTFLAGS "-C" "link-arg=-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+    endif()
+
     set(RUST_CARGO_BUILD_STD "")
     # For more info: https://doc.rust-lang.org/beta/unstable-book/compiler-flags/sanitizer.html#memorysanitizer
     if (SANITIZE STREQUAL "memory")
         set(RUST_CARGO_BUILD_STD "build-std = [\"std\", \"panic_abort\", \"core\", \"alloc\"]")
-        set(RUSTFLAGS "[\"-Zsanitizer=memory\", \"-Zsanitizer-memory-track-origins\"]")
+        list(APPEND RUSTFLAGS "-Zsanitizer=memory" "-Zsanitizer-memory-track-origins")
     endif()
 
+    list(TRANSFORM RUSTFLAGS PREPEND "\"")
+    list(TRANSFORM RUSTFLAGS APPEND "\"")
+    list(JOIN RUSTFLAGS "," RUSTFLAGS)
+    set(RUSTFLAGS "[${RUSTFLAGS}]")
+
     message(STATUS "RUST_CFLAGS: ${RUST_CFLAGS}")
     message(STATUS "RUST_CXXFLAGS: ${RUST_CXXFLAGS}")
     message(STATUS "RUSTFLAGS: ${RUSTFLAGS}")

From b1099e181d178072daffcf78d9dfa2fe8a97cd73 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 11 Feb 2024 11:38:33 +0000
Subject: [PATCH 564/884] Add check for NULL pattern argument

---
 src/Functions/countMatches.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 3ee049ad457..4664ad18cb3 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -47,6 +47,9 @@ public:
     {
         const IColumn * col_pattern = arguments[1].column.get();
         const ColumnConst * col_pattern_const = checkAndGetColumnConst<ColumnString>(col_pattern);
+        if (col_pattern_const == nullptr)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Pattern argument is not const");
+
         const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
 
         const IColumn * col_haystack = arguments[0].column.get();

From 309db427e76aea295ffb3b5e6e787fa6fc0bdf2c Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Sun, 11 Feb 2024 13:13:49 +0100
Subject: [PATCH 565/884] Do not reinitialize ZooKeeperWithFaultInjection on
 each chunk

---
 src/Storages/System/StorageSystemZooKeeper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index a4920d49b6d..abf93bf1ac0 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -220,6 +220,7 @@ private:
     const UInt64 max_block_size;
     Paths paths;
     ContextPtr context;
+    ZooKeeperWithFaultInjection::Ptr zookeeper;
     bool started = false;
 };
 
@@ -484,7 +485,6 @@ Chunk SystemZooKeeperSource::generate()
         settings.insert_keeper_retry_initial_backoff_ms,
         settings.insert_keeper_retry_max_backoff_ms);
 
-    ZooKeeperWithFaultInjection::Ptr zookeeper;
     /// Handles reconnects when needed
     auto get_zookeeper = [&] ()
     {

From cb812d84cf6fb215b3a40d1278c6c5c09cec587a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 11 Feb 2024 22:31:33 +0000
Subject: [PATCH 566/884] Fix: check if std::function is set before calling it

---
 src/Interpreters/executeQuery.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 9cc8d346aa4..8b36790a269 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1374,8 +1374,11 @@ void executeQuery(
                     /// Force an update of the headers before we start writing
                     result_details.content_type = output_format->getContentType();
                     result_details.format = format_name;
-                    set_result_details(result_details);
-                    set_result_details = nullptr;
+                    if (set_result_details)
+                    {
+                        set_result_details(result_details);
+                        set_result_details = nullptr;
+                    }
                 }
             }
             catch (const DB::Exception & e)

From c9c0e12b4cdc7272135089d5ceae7d0ffca9ab43 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 12 Feb 2024 09:15:40 +0100
Subject: [PATCH 567/884] Address PR reviews

---
 src/Coordination/KeeperStorage.h | 40 ++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index 9743e051422..f20f3cca668 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -40,22 +40,6 @@ public:
 
         int64_t mtime{0};
 
-        struct
-        {
-            bool is_ephemeral : 1;
-            int64_t ctime : 63;
-        } is_ephemeral_and_ctime{false, 0};
-
-        union
-        {
-            int64_t ephemeral_owner;
-            struct
-            {
-                int32_t seq_num;
-                int32_t num_children;
-            } children_info;
-        } ephemeral_or_children_data{0};
-
         std::unique_ptr<char[]> data{nullptr};
         uint32_t data_size{0};
 
@@ -174,12 +158,34 @@ public:
         // (e.g. we don't need to copy list of children)
         void shallowCopy(const Node & other);
     private:
+        /// as ctime can't be negative because it stores the timestamp when the
+        /// node was created, we can use the MSB for a bool
+        struct
+        {
+            bool is_ephemeral : 1;
+            int64_t ctime : 63;
+        } is_ephemeral_and_ctime{false, 0};
+
+        /// ephemeral notes cannot have children so a node can set either
+        /// ephemeral_owner OR seq_num + num_children 
+        union
+        {
+            int64_t ephemeral_owner;
+            struct
+            {
+                int32_t seq_num;
+                int32_t num_children;
+            } children_info;
+        } ephemeral_or_children_data{0};
+
         ChildrenSet children{};
     };
 
 #if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER)
     static_assert(
-        sizeof(ListNode<Node>) <= 144, "std::list node containing ListNode<Node> is > 160 bytes which will increase memory consumption");
+        sizeof(ListNode<Node>) <= 144,
+        "std::list node containing ListNode<Node> is > 160 bytes (sizeof(ListNode<Node>) + 16 bytes for pointers) which will increase "
+        "memory consumption");
 #endif
 
     enum DigestVersion : uint8_t

From a0507c204465ac5a0715736cd1f0cba84b2683cd Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 12 Feb 2024 10:36:35 +0100
Subject: [PATCH 568/884] Whitespace

---
 src/Coordination/KeeperStorage.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index f20f3cca668..6618ec0bd85 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -167,7 +167,7 @@ public:
         } is_ephemeral_and_ctime{false, 0};
 
         /// ephemeral notes cannot have children so a node can set either
-        /// ephemeral_owner OR seq_num + num_children 
+        /// ephemeral_owner OR seq_num + num_children
         union
         {
             int64_t ephemeral_owner;

From 504e3b0f1bb3075219af8fbcc5fac51fd3bc0b3f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 06:08:57 +0100
Subject: [PATCH 569/884] Install ch/chc/chl via nfpm

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 packages/clickhouse-client.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml
index 4d707b28ad9..34b42d92adf 100644
--- a/packages/clickhouse-client.yaml
+++ b/packages/clickhouse-client.yaml
@@ -49,6 +49,12 @@ contents:
   dst: /usr/bin/clickhouse-client
 - src: root/usr/bin/clickhouse-local
   dst: /usr/bin/clickhouse-local
+- src: root/usr/bin/ch
+  dst: /usr/bin/ch
+- src: root/usr/bin/chc
+  dst: /usr/bin/chc
+- src: root/usr/bin/chl
+  dst: /usr/bin/chl
 - src: root/usr/bin/clickhouse-obfuscator
   dst: /usr/bin/clickhouse-obfuscator
 # docs

From 7fb31fe16081421ae17b2f66fe31e0c3d0293bb4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 11 Feb 2024 20:00:37 +0100
Subject: [PATCH 570/884] Remove ability to disable generic clickhouse
 components

Components like client/server/... are very generic, and there is no
point in disabling them, since it does not reduce amount of compiled
code a lot anyway (just a few modules for entrypoints, everything else
is already included in the clickhouse binary), and eventually they are
just symlinks to the clickhouse binary.

But there are few, that requires extra libraries, like ODBC bridge or
keeper components (and there is also standalone keeper binary compiled
with musl), those had been kept.

Also add some descriptions for some utils and change exit code to 0 for
--help.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/CMakeLists.txt              | 263 ++++-----------------------
 programs/benchmark/Benchmark.cpp     |   3 +-
 programs/client/Client.cpp           |   1 +
 programs/compressor/Compressor.cpp   |   1 +
 programs/config_tools.h.in           |  15 --
 programs/copier/ClusterCopierApp.cpp |   1 +
 programs/git-import/git-import.cpp   |   1 +
 programs/install/Install.cpp         |   3 +-
 programs/local/LocalServer.cpp       |   1 +
 programs/main.cpp                    |  97 +++-------
 programs/su/su.cpp                   |   1 +
 11 files changed, 69 insertions(+), 318 deletions(-)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 6e544bac81c..7ebbf52ea7e 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -7,35 +7,16 @@ endif ()
 include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
 
 # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.),
-# each of them may be built and linked as a separate library.
-# If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.
+# So client/server/... is just a symlink to `clickhouse` binary.
+#
+# But, there are several components that requires extra libraries, like keeper
+# requires NuRaft, that regular binary does not requires, so you can disable
+# compilation of this components.
+#
+# If you do not know what modes you need, turn then all.
 option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON)
 
-option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_ALL})
-option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
-    ${ENABLE_CLICKHOUSE_ALL})
-
-# https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
-option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL})
-
-# https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/
-option (ENABLE_CLICKHOUSE_BENCHMARK "Queries benchmarking mode" ${ENABLE_CLICKHOUSE_ALL})
-
-option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Configs processor (extract values etc.)" ${ENABLE_CLICKHOUSE_ALL})
-
-# https://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/
-option (ENABLE_CLICKHOUSE_COMPRESSOR "Data compressor and decompressor" ${ENABLE_CLICKHOUSE_ALL})
-
-# https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/
-option (ENABLE_CLICKHOUSE_COPIER "Inter-cluster data copying mode" ${ENABLE_CLICKHOUSE_ALL})
-
-option (ENABLE_CLICKHOUSE_FORMAT "Queries pretty-printer and formatter with syntax highlighting"
-    ${ENABLE_CLICKHOUSE_ALL})
-
 # https://clickhouse.com/docs/en/operations/utilities/clickhouse-obfuscator/
-option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data to benchmark-ready one)"
-    ${ENABLE_CLICKHOUSE_ALL})
-
 # https://clickhouse.com/docs/en/operations/utilities/odbc-bridge/
 # TODO Also needs NANODBC.
 if (ENABLE_ODBC AND NOT USE_MUSL)
@@ -51,18 +32,12 @@ endif ()
 # https://presentations.clickhouse.com/matemarketing_2020/
 option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" ${ENABLE_CLICKHOUSE_ALL})
 
-option (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER "A tool to export table data files to be later put to a static files web server" ${ENABLE_CLICKHOUSE_ALL})
-
 option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL})
 
 option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL})
 
 option (ENABLE_CLICKHOUSE_KEEPER_CLIENT "ClickHouse Keeper Client" ${ENABLE_CLICKHOUSE_ALL})
 
-option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL})
-
-option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL})
-
 if (NOT ENABLE_NURAFT)
     # RECONFIGURE_MESSAGE_LEVEL should not be used here,
     # since ENABLE_NURAFT is set to OFF for FreeBSD and Darwin.
@@ -71,27 +46,7 @@ if (NOT ENABLE_NURAFT)
     set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF)
 endif()
 
-option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" ${ENABLE_CLICKHOUSE_ALL})
-
-message(STATUS "ClickHouse modes:")
-
-if (NOT ENABLE_CLICKHOUSE_SERVER)
-    message(WARNING "ClickHouse server mode is not going to be built.")
-else()
-    message(STATUS "Server mode: ON")
-endif()
-
-if (NOT ENABLE_CLICKHOUSE_CLIENT)
-    message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run tests")
-else()
-    message(STATUS "Client mode: ON")
-endif()
-
-if (ENABLE_CLICKHOUSE_LOCAL)
-    message(STATUS "Local mode: ON")
-else()
-    message(STATUS "Local mode: OFF")
-endif()
+message(STATUS "ClickHouse extra components:")
 
 if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
     message(STATUS "Self-extracting executable: ON")
@@ -99,42 +54,6 @@ else()
     message(STATUS "Self-extracting executable: OFF")
 endif()
 
-if (ENABLE_CLICKHOUSE_BENCHMARK)
-    message(STATUS "Benchmark mode: ON")
-else()
-    message(STATUS "Benchmark mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
-    message(STATUS "Extract from config mode: ON")
-else()
-    message(STATUS "Extract from config mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_COMPRESSOR)
-    message(STATUS "Compressor mode: ON")
-else()
-    message(STATUS "Compressor mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_COPIER)
-    message(STATUS "Copier mode: ON")
-else()
-    message(STATUS "Copier mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_FORMAT)
-    message(STATUS "Format mode: ON")
-else()
-    message(STATUS "Format mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_OBFUSCATOR)
-    message(STATUS "Obfuscator mode: ON")
-else()
-    message(STATUS "Obfuscator mode: OFF")
-endif()
-
 if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
     message(STATUS "ODBC bridge mode: ON")
 else()
@@ -147,18 +66,6 @@ else()
     message(STATUS "Library bridge mode: OFF")
 endif()
 
-if (ENABLE_CLICKHOUSE_INSTALL)
-    message(STATUS "ClickHouse install: ON")
-else()
-    message(STATUS "ClickHouse install: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_GIT_IMPORT)
-    message(STATUS "ClickHouse git-import: ON")
-else()
-    message(STATUS "ClickHouse git-import: OFF")
-endif()
-
 if (ENABLE_CLICKHOUSE_KEEPER)
     message(STATUS "ClickHouse keeper mode: ON")
 else()
@@ -177,19 +84,6 @@ else()
     message(STATUS "ClickHouse keeper-client mode: OFF")
 endif()
 
-
-if (ENABLE_CLICKHOUSE_DISKS)
-    message(STATUS "Clickhouse disks mode: ON")
-else()
-    message(STATUS "ClickHouse disks mode: OFF")
-endif()
-
-if (ENABLE_CLICKHOUSE_SU)
-    message(STATUS "ClickHouse su: ON")
-else()
-    message(STATUS "ClickHouse su: OFF")
-endif()
-
 configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h)
 
 macro(clickhouse_target_link_split_lib target name)
@@ -272,42 +166,6 @@ endif ()
 target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB})
 target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
 
-if (ENABLE_CLICKHOUSE_SERVER)
-    clickhouse_target_link_split_lib(clickhouse server)
-endif ()
-if (ENABLE_CLICKHOUSE_CLIENT)
-    clickhouse_target_link_split_lib(clickhouse client)
-endif ()
-if (ENABLE_CLICKHOUSE_LOCAL)
-    clickhouse_target_link_split_lib(clickhouse local)
-endif ()
-if (ENABLE_CLICKHOUSE_BENCHMARK)
-    clickhouse_target_link_split_lib(clickhouse benchmark)
-endif ()
-if (ENABLE_CLICKHOUSE_COPIER)
-    clickhouse_target_link_split_lib(clickhouse copier)
-endif ()
-if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
-    clickhouse_target_link_split_lib(clickhouse extract-from-config)
-endif ()
-if (ENABLE_CLICKHOUSE_COMPRESSOR)
-    clickhouse_target_link_split_lib(clickhouse compressor)
-endif ()
-if (ENABLE_CLICKHOUSE_FORMAT)
-    clickhouse_target_link_split_lib(clickhouse format)
-endif ()
-if (ENABLE_CLICKHOUSE_OBFUSCATOR)
-    clickhouse_target_link_split_lib(clickhouse obfuscator)
-endif ()
-if (ENABLE_CLICKHOUSE_GIT_IMPORT)
-    clickhouse_target_link_split_lib(clickhouse git-import)
-endif ()
-if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
-    clickhouse_target_link_split_lib(clickhouse static-files-disk-uploader)
-endif ()
-if (ENABLE_CLICKHOUSE_SU)
-    clickhouse_target_link_split_lib(clickhouse su)
-endif ()
 if (ENABLE_CLICKHOUSE_KEEPER)
     clickhouse_target_link_split_lib(clickhouse keeper)
 endif()
@@ -317,83 +175,41 @@ endif()
 if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
     clickhouse_target_link_split_lib(clickhouse keeper-client)
 endif()
-if (ENABLE_CLICKHOUSE_INSTALL)
-    clickhouse_target_link_split_lib(clickhouse install)
-endif ()
-if (ENABLE_CLICKHOUSE_DISKS)
-    clickhouse_target_link_split_lib(clickhouse disks)
-endif ()
+clickhouse_target_link_split_lib(clickhouse install)
 
 set (CLICKHOUSE_BUNDLE)
+macro(clickhouse_program_install name lib_name)
+    clickhouse_target_link_split_lib(clickhouse ${lib_name})
+    add_custom_target (${name} ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ${name} DEPENDS clickhouse)
+    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/${name}" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    list(APPEND CLICKHOUSE_BUNDLE ${name})
+endmacro()
+
 if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
     list(APPEND CLICKHOUSE_BUNDLE self-extracting)
 endif ()
 
-if (NOT BUILD_STANDALONE_KEEPER)
-    add_custom_target (ch ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ch DEPENDS clickhouse)
-endif()
-if (ENABLE_CLICKHOUSE_SERVER)
-    add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-server)
-endif ()
-if (ENABLE_CLICKHOUSE_CLIENT)
-    add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse)
-    add_custom_target (chc ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chc DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-client)
-endif ()
-if (ENABLE_CLICKHOUSE_LOCAL)
-    add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse)
-    add_custom_target (chl ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chl DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-local)
-endif ()
-if (ENABLE_CLICKHOUSE_BENCHMARK)
-    add_custom_target (clickhouse-benchmark ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-benchmark DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-benchmark)
-endif ()
-if (ENABLE_CLICKHOUSE_COPIER)
-    add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier)
-endif ()
-if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
-    add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-extract-from-config)
-endif ()
-if (ENABLE_CLICKHOUSE_COMPRESSOR)
-    add_custom_target (clickhouse-compressor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-compressor DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-compressor" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-compressor)
-endif ()
-if (ENABLE_CLICKHOUSE_FORMAT)
-    add_custom_target (clickhouse-format ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-format DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-format)
-endif ()
-if (ENABLE_CLICKHOUSE_OBFUSCATOR)
-    add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
-endif ()
-if (ENABLE_CLICKHOUSE_GIT_IMPORT)
-    add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
-endif ()
-if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
-    add_custom_target (clickhouse-static-files-disk-uploader ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-static-files-disk-uploader DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-static-files-disk-uploader" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader)
-endif ()
-if (ENABLE_CLICKHOUSE_SU)
-    add_custom_target (clickhouse-su ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-su DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-su" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-su)
-endif ()
+clickhouse_program_install(clickhouse-server server)
+
+# client
+clickhouse_program_install(clickhouse-client client)
+add_custom_target (chc ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chc DEPENDS clickhouse)
+
+# local
+clickhouse_program_install(clickhouse-local local)
+add_custom_target (chl ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chl DEPENDS clickhouse)
+add_custom_target (ch ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ch DEPENDS clickhouse)
+
+clickhouse_program_install(clickhouse-benchmark benchmark)
+clickhouse_program_install(clickhouse-copier copier)
+clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
+clickhouse_program_install(clickhouse-compressor compressor)
+clickhouse_program_install(clickhouse-format format)
+clickhouse_program_install(clickhouse-obfuscator obfuscator)
+clickhouse_program_install(clickhouse-git-import git-import)
+clickhouse_program_install(clickhouse-static-files-disk-uploader static-files-disk-uploader)
+clickhouse_program_install(clickhouse-disks disks)
+clickhouse_program_install(clickhouse-su su)
 
 if (ENABLE_CLICKHOUSE_KEEPER)
     if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK)
@@ -423,11 +239,6 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
 
     list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
 endif ()
-if (ENABLE_CLICKHOUSE_DISKS)
-    add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    list(APPEND CLICKHOUSE_BUNDLE clickhouse-disks)
-endif ()
 
 add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
 
diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 59fc6c0c17f..961c678b936 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -640,7 +640,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
         {
             std::cout << "Usage: " << argv[0] << " [options] < queries.txt\n";
             std::cout << desc << "\n";
-            return 1;
+            std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/\n";
+            return 0;
         }
 
         print_stacktrace = options.count("stacktrace");
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index cc142470d7f..fdd262f185d 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1000,6 +1000,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description)
     std::cout << options_description.external_description.value() << "\n";
     std::cout << options_description.hosts_and_ports_description.value() << "\n";
     std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
+    std::cout << "\nSee also: https://clickhouse.com/docs/en/integrations/sql-clients/cli\n";
 }
 
 
diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp
index cc25747702a..7125fdc744f 100644
--- a/programs/compressor/Compressor.cpp
+++ b/programs/compressor/Compressor.cpp
@@ -100,6 +100,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
             std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
             std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
             std::cout << desc << std::endl;
+            std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/\n";
             return 0;
         }
 
diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in
index 65ef3ca762b..50a1de5628b 100644
--- a/programs/config_tools.h.in
+++ b/programs/config_tools.h.in
@@ -2,23 +2,8 @@
 
 #pragma once
 
-#cmakedefine01 ENABLE_CLICKHOUSE_SERVER
-#cmakedefine01 ENABLE_CLICKHOUSE_CLIENT
-#cmakedefine01 ENABLE_CLICKHOUSE_LOCAL
-#cmakedefine01 ENABLE_CLICKHOUSE_BENCHMARK
-#cmakedefine01 ENABLE_CLICKHOUSE_PERFORMANCE_TEST
-#cmakedefine01 ENABLE_CLICKHOUSE_COPIER
-#cmakedefine01 ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG
-#cmakedefine01 ENABLE_CLICKHOUSE_COMPRESSOR
-#cmakedefine01 ENABLE_CLICKHOUSE_FORMAT
-#cmakedefine01 ENABLE_CLICKHOUSE_OBFUSCATOR
-#cmakedefine01 ENABLE_CLICKHOUSE_GIT_IMPORT
-#cmakedefine01 ENABLE_CLICKHOUSE_INSTALL
 #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
 #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE
 #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER
 #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CLIENT
 #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER
-#cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
-#cmakedefine01 ENABLE_CLICKHOUSE_SU
-#cmakedefine01 ENABLE_CLICKHOUSE_DISKS
diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp
index 53f79888573..fdf07dec61a 100644
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@@ -78,6 +78,7 @@ void ClusterCopierApp::handleHelp(const std::string &, const std::string &)
     help_formatter.setHeader("Copies tables from one cluster to another");
     help_formatter.setUsage("--config-file <config-file> --task-path <task-path>");
     help_formatter.format(std::cerr);
+    help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/");
 
     stopOptionsProcessing();
 }
diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index 16244232bee..fdabeacd46e 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -172,6 +172,7 @@ clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv
 clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv
 clickhouse-client --query "INSERT INTO git.line_changes FORMAT TSV" < line_changes.tsv
 
+Check out this presentation: https://presentations.clickhouse.com/matemarketing_2020/
 )";
 
 namespace po = boost::program_options;
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index a4a4672f5c4..c3d2c61d6d0 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -242,9 +242,10 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
         if (options.count("help"))
         {
+            std::cout << "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)\n\n";
             std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " install [options]", getuid() != 0) << '\n';
             std::cout << desc << '\n';
-            return 1;
+            return 0;
         }
 
         /// We need to copy binary to the binary directory.
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 443d4a52fa3..d7acf4112a5 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -828,6 +828,7 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o
     std::cout << options_description.main_description.value() << "\n";
     std::cout << getHelpFooter() << "\n";
     std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
+    std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/\n";
 #endif
 }
 
diff --git a/programs/main.cpp b/programs/main.cpp
index 1ff7e5db560..3896b3819a8 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -24,36 +24,28 @@
 
 
 /// Universal executable for various clickhouse applications
-#if ENABLE_CLICKHOUSE_SERVER
 int mainEntryClickHouseServer(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_CLIENT
 int mainEntryClickHouseClient(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_LOCAL
 int mainEntryClickHouseLocal(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_BENCHMARK
 int mainEntryClickHouseBenchmark(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG
 int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_COMPRESSOR
 int mainEntryClickHouseCompressor(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_FORMAT
 int mainEntryClickHouseFormat(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_COPIER
 int mainEntryClickHouseClusterCopier(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_OBFUSCATOR
 int mainEntryClickHouseObfuscator(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_GIT_IMPORT
 int mainEntryClickHouseGitImport(int argc, char ** argv);
-#endif
+int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
+int mainEntryClickHouseSU(int argc, char ** argv);
+int mainEntryClickHouseDisks(int argc, char ** argv);
+
+int mainEntryClickHouseHashBinary(int, char **)
+{
+    /// Intentionally without newline. So you can run:
+    /// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
+    std::cout << getHashOfLoadedBinaryHex();
+    return 0;
+}
+
 #if ENABLE_CLICKHOUSE_KEEPER
 int mainEntryClickHouseKeeper(int argc, char ** argv);
 #endif
@@ -63,30 +55,13 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
 #if ENABLE_CLICKHOUSE_KEEPER_CLIENT
 int mainEntryClickHouseKeeperClient(int argc, char ** argv);
 #endif
-#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
-int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_SU
-int mainEntryClickHouseSU(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_INSTALL
+
+// install
 int mainEntryClickHouseInstall(int argc, char ** argv);
 int mainEntryClickHouseStart(int argc, char ** argv);
 int mainEntryClickHouseStop(int argc, char ** argv);
 int mainEntryClickHouseStatus(int argc, char ** argv);
 int mainEntryClickHouseRestart(int argc, char ** argv);
-#endif
-#if ENABLE_CLICKHOUSE_DISKS
-int mainEntryClickHouseDisks(int argc, char ** argv);
-#endif
-
-int mainEntryClickHouseHashBinary(int, char **)
-{
-    /// Intentionally without newline. So you can run:
-    /// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
-    std::cout << getHashOfLoadedBinaryHex();
-    return 0;
-}
 
 namespace
 {
@@ -98,36 +73,22 @@ using MainFunc = int (*)(int, char**);
 /// Add an item here to register new application
 std::pair<std::string_view, MainFunc> clickhouse_applications[] =
 {
-#if ENABLE_CLICKHOUSE_LOCAL
     {"local", mainEntryClickHouseLocal},
-#endif
-#if ENABLE_CLICKHOUSE_CLIENT
     {"client", mainEntryClickHouseClient},
-#endif
-#if ENABLE_CLICKHOUSE_BENCHMARK
     {"benchmark", mainEntryClickHouseBenchmark},
-#endif
-#if ENABLE_CLICKHOUSE_SERVER
     {"server", mainEntryClickHouseServer},
-#endif
-#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG
     {"extract-from-config", mainEntryClickHouseExtractFromConfig},
-#endif
-#if ENABLE_CLICKHOUSE_COMPRESSOR
     {"compressor", mainEntryClickHouseCompressor},
-#endif
-#if ENABLE_CLICKHOUSE_FORMAT
     {"format", mainEntryClickHouseFormat},
-#endif
-#if ENABLE_CLICKHOUSE_COPIER
     {"copier", mainEntryClickHouseClusterCopier},
-#endif
-#if ENABLE_CLICKHOUSE_OBFUSCATOR
     {"obfuscator", mainEntryClickHouseObfuscator},
-#endif
-#if ENABLE_CLICKHOUSE_GIT_IMPORT
     {"git-import", mainEntryClickHouseGitImport},
-#endif
+    {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
+    {"su", mainEntryClickHouseSU},
+    {"hash-binary", mainEntryClickHouseHashBinary},
+    {"disks", mainEntryClickHouseDisks},
+
+    // keeper
 #if ENABLE_CLICKHOUSE_KEEPER
     {"keeper", mainEntryClickHouseKeeper},
 #endif
@@ -137,34 +98,20 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
 #if ENABLE_CLICKHOUSE_KEEPER_CLIENT
     {"keeper-client", mainEntryClickHouseKeeperClient},
 #endif
-#if ENABLE_CLICKHOUSE_INSTALL
+
+    // install
     {"install", mainEntryClickHouseInstall},
     {"start", mainEntryClickHouseStart},
     {"stop", mainEntryClickHouseStop},
     {"status", mainEntryClickHouseStatus},
     {"restart", mainEntryClickHouseRestart},
-#endif
-#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
-    {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
-#endif
-#if ENABLE_CLICKHOUSE_SU
-    {"su", mainEntryClickHouseSU},
-#endif
-    {"hash-binary", mainEntryClickHouseHashBinary},
-#if ENABLE_CLICKHOUSE_DISKS
-    {"disks", mainEntryClickHouseDisks},
-#endif
 };
 
 /// Add an item here to register a new short name
 std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
 {
-#if ENABLE_CLICKHOUSE_LOCAL
     {"chl", "local"},
-#endif
-#if ENABLE_CLICKHOUSE_CLIENT
     {"chc", "client"},
-#endif
 };
 
 int printHelp(int, char **)
diff --git a/programs/su/su.cpp b/programs/su/su.cpp
index a8f61fb32b6..33d929898f4 100644
--- a/programs/su/su.cpp
+++ b/programs/su/su.cpp
@@ -107,6 +107,7 @@ try
 
     if (argc < 3)
     {
+        std::cout << "A tool similar to 'su'" << std::endl;
         std::cout << "Usage: ./clickhouse su user:group ..." << std::endl;
         exit(0); // NOLINT(concurrency-mt-unsafe)
     }

From 67b73c3e496b804ee207ead514e25db079613730 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 11:33:40 +0100
Subject: [PATCH 571/884] Fix long shutdown of FileLog storage

Previously it was possible to wait up to
poll_directory_watch_events_backoff_max (default is 32000) on shutdown,
because it was not possible to stop poll of inotify.

Before (takes 3 seconds):

    2024.02.12 11:27:55.058192 [ 10134 ] {} <Trace> StorageFileLog (file_log): Waiting for cleanup
    2024.02.12 11:27:58.178021 [ 10271 ] {} <Trace> directory_watch: Execution took 7519 ms.

After:

    2024.02.12 11:33:29.722403 [ 15866 ] {} <Trace> StorageFileLog (file_log): Waiting for cleanup
    2024.02.12 11:33:29.722473 [ 15956 ] {} <Trace> directory_watch: Execution took 6399 ms.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/FileLog/DirectoryWatcherBase.cpp | 23 +++++++++++--------
 src/Storages/FileLog/DirectoryWatcherBase.h   | 11 +++++----
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp
index 8209483fac9..f1cf0866de7 100644
--- a/src/Storages/FileLog/DirectoryWatcherBase.cpp
+++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp
@@ -34,8 +34,8 @@ DirectoryWatcherBase::DirectoryWatcherBase(
     if (!std::filesystem::is_directory(path))
         throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path {} is not a directory", path);
 
-    fd = inotify_init();
-    if (fd == -1)
+    inotify_fd = inotify_init();
+    if (inotify_fd == -1)
         throw ErrnoException(ErrorCodes::IO_SETUP_ERROR, "Cannot initialize inotify");
 
     watch_task = getContext()->getSchedulePool().createTask("directory_watch", [this] { watchFunc(); });
@@ -56,7 +56,7 @@ void DirectoryWatcherBase::watchFunc()
     if (eventMask() & DirectoryWatcherBase::DW_ITEM_MOVED_TO)
         mask |= IN_MOVED_TO;
 
-    int wd = inotify_add_watch(fd, path.c_str(), mask);
+    int wd = inotify_add_watch(inotify_fd, path.c_str(), mask);
     if (wd == -1)
     {
         owner.onError(Exception(ErrorCodes::IO_SETUP_ERROR, "Watch directory {} failed", path));
@@ -65,16 +65,20 @@ void DirectoryWatcherBase::watchFunc()
 
     std::string buffer;
     buffer.resize(buffer_size);
-    pollfd pfd;
-    pfd.fd = fd;
-    pfd.events = POLLIN;
+    pollfd pfds[2];
+    /// inotify descriptor
+    pfds[0].fd = inotify_fd;
+    pfds[0].events = POLLIN;
+    // notifier
+    pfds[1].fd = event_pipe.fds_rw[0];
+    pfds[1].events = POLLIN;
     while (!stopped)
     {
         const auto & settings = owner.storage.getFileLogSettings();
-        if (poll(&pfd, 1, static_cast<int>(milliseconds_to_wait)) > 0 && pfd.revents & POLLIN)
+        if (poll(pfds, 2, static_cast<int>(milliseconds_to_wait)) > 0 && pfds[0].revents & POLLIN)
         {
             milliseconds_to_wait = settings->poll_directory_watch_events_backoff_init.totalMilliseconds();
-            ssize_t n = read(fd, buffer.data(), buffer.size());
+            ssize_t n = read(inotify_fd, buffer.data(), buffer.size());
             int i = 0;
             if (n > 0)
             {
@@ -130,7 +134,7 @@ void DirectoryWatcherBase::watchFunc()
 DirectoryWatcherBase::~DirectoryWatcherBase()
 {
     stop();
-    int err = ::close(fd);
+    int err = ::close(inotify_fd);
     chassert(!err || errno == EINTR);
 }
 
@@ -143,6 +147,7 @@ void DirectoryWatcherBase::start()
 void DirectoryWatcherBase::stop()
 {
     stopped = true;
+    ::write(event_pipe.fds_rw[1], "\0", 1);
     if (watch_task)
         watch_task->deactivate();
 }
diff --git a/src/Storages/FileLog/DirectoryWatcherBase.h b/src/Storages/FileLog/DirectoryWatcherBase.h
index a640f686c8a..0dfb58fbc5c 100644
--- a/src/Storages/FileLog/DirectoryWatcherBase.h
+++ b/src/Storages/FileLog/DirectoryWatcherBase.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/BackgroundSchedulePool.h>
+#include <Common/PipeFDs.h>
 
 #include <atomic>
 #include <memory>
@@ -85,10 +86,6 @@ public:
 
     void watchFunc();
 
-protected:
-    void start();
-    void stop();
-
 private:
     FileLogDirectoryWatcher & owner;
 
@@ -102,7 +99,11 @@ private:
     int event_mask;
     uint64_t milliseconds_to_wait;
 
-    int fd;
+    int inotify_fd;
+    PipeFDs event_pipe;
+
+    void start();
+    void stop();
 };
 
 }

From 3145c5d5f5c045a59faace2a3f07d49f8f44f9f3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 12:02:42 +0100
Subject: [PATCH 572/884] Add missing install target for ch/chc/chl

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 7ebbf52ea7e..dac168bd226 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -194,11 +194,14 @@ clickhouse_program_install(clickhouse-server server)
 # client
 clickhouse_program_install(clickhouse-client client)
 add_custom_target (chc ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chc DEPENDS clickhouse)
+install (FILES "${CMAKE_CURRENT_BINARY_DIR}/chc" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 
 # local
 clickhouse_program_install(clickhouse-local local)
 add_custom_target (chl ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chl DEPENDS clickhouse)
+install (FILES "${CMAKE_CURRENT_BINARY_DIR}/chl" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 add_custom_target (ch ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ch DEPENDS clickhouse)
+install (FILES "${CMAKE_CURRENT_BINARY_DIR}/ch" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 
 clickhouse_program_install(clickhouse-benchmark benchmark)
 clickhouse_program_install(clickhouse-copier copier)

From 11fddc8d63b274c33d60b8b9955c7ab70f11fb7c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 12:04:17 +0100
Subject: [PATCH 573/884] Unify binary aliases

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/CMakeLists.txt | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index dac168bd226..e68b75db944 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -183,6 +183,12 @@ macro(clickhouse_program_install name lib_name)
     add_custom_target (${name} ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ${name} DEPENDS clickhouse)
     install (FILES "${CMAKE_CURRENT_BINARY_DIR}/${name}" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
     list(APPEND CLICKHOUSE_BUNDLE ${name})
+
+    foreach(alias ${ARGN})
+        message(STATUS "Adding alias ${alias} for ${name}")
+        add_custom_target (${alias} ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ${alias} DEPENDS clickhouse)
+        install (FILES "${CMAKE_CURRENT_BINARY_DIR}/${alias}" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    endforeach()
 endmacro()
 
 if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
@@ -190,19 +196,8 @@ if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
 endif ()
 
 clickhouse_program_install(clickhouse-server server)
-
-# client
-clickhouse_program_install(clickhouse-client client)
-add_custom_target (chc ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chc DEPENDS clickhouse)
-install (FILES "${CMAKE_CURRENT_BINARY_DIR}/chc" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-
-# local
-clickhouse_program_install(clickhouse-local local)
-add_custom_target (chl ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse chl DEPENDS clickhouse)
-install (FILES "${CMAKE_CURRENT_BINARY_DIR}/chl" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-add_custom_target (ch ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ch DEPENDS clickhouse)
-install (FILES "${CMAKE_CURRENT_BINARY_DIR}/ch" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-
+clickhouse_program_install(clickhouse-client client chc)
+clickhouse_program_install(clickhouse-local local chl ch)
 clickhouse_program_install(clickhouse-benchmark benchmark)
 clickhouse_program_install(clickhouse-copier copier)
 clickhouse_program_install(clickhouse-extract-from-config extract-from-config)

From bc2921d1f11941a8f95fb047d0b0ecdcf692cf31 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 11:53:06 +0100
Subject: [PATCH 574/884] tests: fix 02322_sql_insert_format flakiness

02322_sql_insert_format failed from time to time [1] and I found only
one reason - structure cache, I guess it may fail when the mtime was the
same.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/59857/46a9ced0f9031153538446f4b625e0cc34532a90/fast_test.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/queries/0_stateless/02322_sql_insert_format.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql
index 34cde1e56b6..ccceaee31d9 100644
--- a/tests/queries/0_stateless/02322_sql_insert_format.sql
+++ b/tests/queries/0_stateless/02322_sql_insert_format.sql
@@ -1,5 +1,7 @@
 -- Tags: no-parallel
 
+set schema_inference_use_cache_for_file=0;
+
 select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert;
 select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=1;
 select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=2;

From 13e38772546237e94864a55d6ea1ad58e1188591 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 14:30:42 +0100
Subject: [PATCH 575/884] Add chc/chl/ch into clickhouse-bundle target

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index e68b75db944..62bcf068879 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -188,6 +188,7 @@ macro(clickhouse_program_install name lib_name)
         message(STATUS "Adding alias ${alias} for ${name}")
         add_custom_target (${alias} ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse ${alias} DEPENDS clickhouse)
         install (FILES "${CMAKE_CURRENT_BINARY_DIR}/${alias}" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+        list(APPEND CLICKHOUSE_BUNDLE ${alias})
     endforeach()
 endmacro()
 

From fd2ee19f657276f47d280af83e2bfc95132fe919 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 13:37:25 +0000
Subject: [PATCH 576/884] Follow up for #58554. Cleanup.

---
 src/Interpreters/ActionsDAG.cpp                   | 15 ++-------------
 .../QueryPlan/Optimizations/optimizePrewhere.cpp  |  5 ++---
 .../MergeTree/MergeTreeWhereOptimizer.cpp         |  3 ---
 3 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 4fd7c6d9117..7240679abb7 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -1777,15 +1777,6 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
 
                         child = child_data.to_second;
                     }
-
-                    /// Input from second DAG should also be in the first.
-                    // if (copy.type == ActionType::INPUT)
-                    // {
-                    //     auto & input_copy = first_nodes.emplace_back(*cur.node);
-                    //     assert(cur_data.to_first == nullptr);
-                    //     cur_data.to_first = &input_copy;
-                    //     new_inputs.push_back(cur.node);
-                    // }
                 }
                 else
                 {
@@ -1837,10 +1828,8 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
     for (const auto * input : new_inputs)
     {
         const auto & cur = data[input];
-        if (cur.to_second)
-            second_inputs.push_back(cur.to_second);
-        if (cur.to_first)
-            first_outputs.push_back(cur.to_first);
+        second_inputs.push_back(cur.to_second);
+        first_outputs.push_back(cur.to_first);
     }
 
     for (const auto * input_node : inputs)
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index ec07f028f20..49e1a49f131 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -4,9 +4,8 @@
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
 #include <Interpreters/ActionsDAG.h>
-#include <Planner/ActionsChain.h>
-#include "Functions/FunctionsLogical.h"
-#include "Functions/IFunctionAdaptors.h"
+#include <Functions/FunctionsLogical.h>
+#include <Functions/IFunctionAdaptors.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index c52a2fee051..d9a89b9d4ef 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -132,9 +132,6 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op
     if (!optimize_result)
         return {};
 
-    // if (optimize_result->where_conditions.empty())
-    //     return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
-
     std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
     for (const auto & condition : optimize_result->prewhere_conditions)
         prewhere_conditions.insert(condition.node.getDAGNode());

From 87b6d65b9bdd701f88c99670304c805344c7aa7c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 13:46:30 +0000
Subject: [PATCH 577/884] Remove outdated comment.

---
 src/Planner/Planner.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index efccadcbe1a..bcc42dbae7f 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -71,7 +71,6 @@
 #include <Planner/PlannerAggregation.h>
 #include <Planner/PlannerSorting.h>
 #include <Planner/PlannerWindowFunctions.h>
-#include <Planner/ActionsChain.h>
 #include <Planner/CollectSets.h>
 #include <Planner/CollectTableExpressionData.h>
 #include <Planner/PlannerJoinTree.h>
@@ -98,14 +97,6 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
 }
 
-/** ClickHouse query planner.
-  *
-  * TODO: Support projections.
-  * TODO: Support trivial count using partition predicates.
-  * TODO: Support trivial count for table functions.
-  * TODO: Support indexes for IN function.
-  */
-
 namespace
 {
 

From 2d7fdc896a714c48d990264e17443d6c8834620b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 12 Feb 2024 14:03:45 +0000
Subject: [PATCH 578/884] Add test 02988_join_using_prewhere_pushdown

---
 ...988_join_using_prewhere_pushdown.reference |  2 ++
 .../02988_join_using_prewhere_pushdown.sql    | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 tests/queries/0_stateless/02988_join_using_prewhere_pushdown.reference
 create mode 100644 tests/queries/0_stateless/02988_join_using_prewhere_pushdown.sql

diff --git a/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.reference b/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.reference
new file mode 100644
index 00000000000..c9bf491872a
--- /dev/null
+++ b/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.reference
@@ -0,0 +1,2 @@
+1	a
+2	b	Int64
diff --git a/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.sql b/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.sql
new file mode 100644
index 00000000000..db49f155d3f
--- /dev/null
+++ b/tests/queries/0_stateless/02988_join_using_prewhere_pushdown.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t;
+
+SET allow_suspicious_low_cardinality_types = 1;
+
+
+CREATE TABLE t (`id` UInt16, `u` LowCardinality(Int32), `s` LowCardinality(String))
+ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO t VALUES (1,1,'a'),(2,2,'b');
+
+SELECT u, s FROM t
+INNER JOIN ( SELECT number :: Int32 AS u FROM numbers(10) ) AS t1
+USING (u)
+WHERE u != 2
+;
+
+SELECT u, s, toTypeName(u) FROM t
+FULL JOIN ( SELECT number :: UInt32 AS u FROM numbers(10) ) AS t1
+USING (u)
+WHERE u == 2
+ORDER BY 1
+;
+
+DROP TABLE IF EXISTS t;

From c8807393114b870bf94c751ed32d054741cf22b0 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Mon, 12 Feb 2024 13:44:14 +0000
Subject: [PATCH 579/884] CI: Fix build job failures due to jepsen artifacts

---
 tests/jepsen.clickhouse/project.clj | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/jepsen.clickhouse/project.clj b/tests/jepsen.clickhouse/project.clj
index 6c714604b56..bb41be1ba10 100644
--- a/tests/jepsen.clickhouse/project.clj
+++ b/tests/jepsen.clickhouse/project.clj
@@ -13,4 +13,7 @@
                  [com.hierynomus/sshj "0.34.0"]
                  [com.clickhouse/clickhouse-jdbc "0.3.2-patch11"]
                  [org.apache.zookeeper/zookeeper "3.6.1" :exclusions [org.slf4j/slf4j-log4j12]]]
-  :repl-options {:init-ns jepsen.clickhouse-keeper.main})
+  :repl-options {:init-ns jepsen.clickhouse-keeper.main}
+  ;; otherwise, target artifacts will be created under the repo root, so that checkout with clear might fail in ci
+  :target-path "/tmp/jepsen_clickhouse"
+)

From b094ab9763120ed03015e27eeb2f13247330a017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 12 Feb 2024 15:27:20 +0000
Subject: [PATCH 580/884] Add comment

---
 tests/clickhouse-test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 49c517852a6..9c21f1fd2a2 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2494,7 +2494,8 @@ def main(args):
                     time DateTime,
                     test_name String,
                     coverage Array(UInt64)
-                ) ENGINE = MergeTree ORDER BY test_name;
+                ) ENGINE = MergeTree ORDER BY test_name
+                COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster';
             """,
         )
 

From c7c05c9881362079fa6a02430fc5cd63dfa65bb7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 16:22:10 +0100
Subject: [PATCH 581/884] Do not pull mutations if pulling replication log had
 been stopped

Right now, mutations can be pulled even after:

    SYSTEM STOP PULLING REPLICATION LOG

Since they pulled from two places:
- StorageReplicatedMergeTree::mutationsUpdatingTask()
- ReplicatedMergeTreeQueue::pullLogsToQueue()

And only the last one checks action blocker.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 8d921bdcb1c..e26a36202dd 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -860,6 +860,9 @@ ActiveDataPartSet getPartNamesToMutate(
 
 int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback)
 {
+    if (pull_log_blocker.isCancelled())
+        throw Exception(ErrorCodes::ABORTED, "Log pulling is cancelled");
+
     std::lock_guard lock(update_mutations_mutex);
 
     Coordination::Stat mutations_stat;

From 0628ae62681d970a37414a251e0894de1e3b4569 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 12 Feb 2024 17:13:30 +0100
Subject: [PATCH 582/884] S3 queue fix uninitialized value

---
 src/Storages/S3Queue/S3QueueTableMetadata.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
index 3ee2594135d..1830bac4743 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
@@ -69,16 +69,23 @@ void S3QueueTableMetadata::read(const String & metadata_str)
 {
     Poco::JSON::Parser parser;
     auto json = parser.parse(metadata_str).extract<Poco::JSON::Object::Ptr>();
+
     after_processing = json->getValue<String>("after_processing");
     mode = json->getValue<String>("mode");
     s3queue_tracked_files_limit = json->getValue<UInt64>("s3queue_tracked_files_limit");
     s3queue_tracked_file_ttl_sec = json->getValue<UInt64>("s3queue_tracked_file_ttl_sec");
     format_name = json->getValue<String>("format_name");
     columns = json->getValue<String>("columns");
+
     if (json->has("s3queue_total_shards_num"))
         s3queue_total_shards_num = json->getValue<UInt64>("s3queue_total_shards_num");
+    else
+        s3queue_total_shards_num = 1;
+
     if (json->has("s3queue_processing_threads_num"))
         s3queue_processing_threads_num = json->getValue<UInt64>("s3queue_processing_threads_num");
+    else
+        s3queue_processing_threads_num = 1;
 }
 
 S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str)

From 69273b46451a288f6f686b3e1238d8992e9ce0de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 12 Feb 2024 17:10:10 +0000
Subject: [PATCH 583/884] Fix parsing of partition expressions surrounded by
 parens

---
 src/Parsers/ParserPartition.cpp               | 34 +++++++++----------
 ...02897_alter_partition_parameters.reference |  5 +++
 .../02897_alter_partition_parameters.sql      | 29 ++++++++++++++++
 3 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp
index 80debc13c67..0cbd6898dd9 100644
--- a/src/Parsers/ParserPartition.cpp
+++ b/src/Parsers/ParserPartition.cpp
@@ -8,6 +8,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Common/typeid_cast.h>
 #include <Parsers/ASTQueryParameter.h>
+#include <iostream>
 
 namespace DB
 {
@@ -18,8 +19,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_all("ALL");
     ParserStringLiteral parser_string_literal;
     ParserSubstitution parser_substitution;
-    ParserLiteral literal_parser;
-    ParserTupleOfLiterals tuple_of_literals;
     ParserExpression parser_expr;
 
     auto partition = std::make_shared<ASTPartition>();
@@ -45,34 +44,35 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     {
         ASTPtr value;
         std::optional<size_t> fields_count;
-        if (literal_parser.parse(pos, value, expected) || tuple_of_literals.parse(pos, value, expected))
-        {
-            auto * literal = value->as<ASTLiteral>();
-            if (literal->value.getType() == Field::Types::Tuple)
-            {
-                fields_count = literal->value.get<const Tuple &>().size();
-            }
-            else
-            {
-                fields_count = 1;
-            }
-        }
-        else if (parser_substitution.parse(pos, value, expected))
+        if (parser_substitution.parse(pos, value, expected))
         {
             /// It can be tuple substitution
             fields_count = std::nullopt;
         }
         else if (parser_expr.parse(pos, value, expected))
         {
-            const auto * tuple_ast = value->as<ASTFunction>();
-            if (tuple_ast && tuple_ast->name == "tuple")
+            if (const auto * tuple_ast = value->as<ASTFunction>(); tuple_ast)
             {
+                if (tuple_ast->name != "tuple")
+                    return false;
+
                 const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
                 if (arguments_ast)
                     fields_count = arguments_ast->children.size();
                 else
                     fields_count = 0;
             }
+            else if (const auto* literal_ast = value->as<ASTLiteral>(); literal_ast)
+            {
+                if (literal_ast->value.getType() == Field::Types::Tuple)
+                {
+                    fields_count = literal_ast->value.get<const Tuple &>().size();
+                }
+                else
+                {
+                    fields_count = 1;
+                }
+            }
             else
                 return false;
         }
diff --git a/tests/queries/0_stateless/02897_alter_partition_parameters.reference b/tests/queries/0_stateless/02897_alter_partition_parameters.reference
index bc6ff2b709c..d4b70c58ae5 100644
--- a/tests/queries/0_stateless/02897_alter_partition_parameters.reference
+++ b/tests/queries/0_stateless/02897_alter_partition_parameters.reference
@@ -7,3 +7,8 @@
 0
 0
 0
+0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/02897_alter_partition_parameters.sql b/tests/queries/0_stateless/02897_alter_partition_parameters.sql
index 62ceb9d9768..0be7308ed1a 100644
--- a/tests/queries/0_stateless/02897_alter_partition_parameters.sql
+++ b/tests/queries/0_stateless/02897_alter_partition_parameters.sql
@@ -10,6 +10,24 @@ PARTITION BY toMonday(EventDate);
 
 INSERT INTO test VALUES(toDate('2023-10-09'));
 
+ALTER TABLE test DROP PARTITION ('2023-10-09');
+
+SELECT count() FROM test;
+
+INSERT INTO test VALUES(toDate('2023-10-09'));
+
+ALTER TABLE test DROP PARTITION (('2023-10-09'));
+
+SELECT count() FROM test;
+
+INSERT INTO test VALUES(toDate('2023-10-09'));
+
+ALTER TABLE test DROP PARTITION '2023-10-09';
+
+SELECT count() FROM test;
+
+INSERT INTO test VALUES(toDate('2023-10-09'));
+
 SET param_partition='2023-10-09';
 
 ALTER TABLE test DROP PARTITION {partition:String};
@@ -51,6 +69,17 @@ ENGINE = MergeTree
 ORDER BY tuple()
 PARTITION BY (a * b, b * b);
 
+INSERT INTO test2 VALUES(1, 2);
+
+ALTER TABLE test2 DROP PARTITION tuple(2, 4);
+
+SELECT count() FROM test2;
+
+INSERT INTO test2 VALUES(1, 2);
+
+ALTER TABLE test2 DROP PARTITION (2, 4);
+
+SELECT count() FROM test2;
 
 INSERT INTO test2 VALUES(1, 2);
 

From f5a71455b86210c8ec9968071f63b88ae434da5e Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 12 Feb 2024 17:56:05 +0100
Subject: [PATCH 584/884] Do not rebuild a lambda package if it is updated

---
 .../ci/team_keys_lambda/build_and_deploy_archive.sh  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index 3c6c8e0ac1e..6ba0987010a 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -17,6 +17,18 @@ DOCKER_IMAGE="public.ecr.aws/lambda/python:${PY_VERSION}"
 LAMBDA_NAME=${DIR_NAME//_/-}
 # The name of directory with lambda code
 PACKAGE=lambda-package
+
+# Do not rebuild and deploy the archive if it's newer than sources
+if [ -e "$PACKAGE.zip" ] && [ -z "$FORCE" ]; then
+  REBUILD=""
+  for src in app.py build_and_deploy_archive.sh requirements.txt lambda_shared/*; do
+    if [ "$src" -nt "$PACKAGE.zip" ]; then
+      REBUILD=1
+    fi
+  done
+  [ -n "$REBUILD" ] || exit 0
+fi
+
 rm -rf "$PACKAGE" "$PACKAGE".zip
 mkdir "$PACKAGE"
 cp app.py "$PACKAGE"

From 310b0773b271d165dd152da1474ba4a892b05b54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 12 Feb 2024 17:25:17 +0000
Subject: [PATCH 585/884] Fix include used for debugging

---
 src/Parsers/ParserPartition.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp
index 0cbd6898dd9..f7d972dd4af 100644
--- a/src/Parsers/ParserPartition.cpp
+++ b/src/Parsers/ParserPartition.cpp
@@ -8,7 +8,6 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Common/typeid_cast.h>
 #include <Parsers/ASTQueryParameter.h>
-#include <iostream>
 
 namespace DB
 {

From d7850db40c8491d1f023dd5f532eee9ee1e8b80a Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Mon, 12 Feb 2024 09:54:38 -0800
Subject: [PATCH 586/884] [Docs] Remove incorrect statement about Memory table
 engine

---
 docs/en/engines/table-engines/special/memory.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md
index 54547b1bc69..0d552a69804 100644
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@@ -10,7 +10,6 @@ sidebar_label:  Memory
 When using the Memory table engine on ClickHouse Cloud, data is not replicated across all nodes (by design). To guarantee that all queries are routed to the same node and that the Memory table engine works as expected, you can do one of the following:
 - Execute all operations in the same session
 - Use a client that uses TCP or the native interface (which enables support for sticky connections) such as [clickhouse-client](/en/interfaces/cli)
-- Submit and execute all queries at once using a multi-statement query (required with clients using the HTTP interface such as [clickhouse-connect](/en/integrations/python))
 :::
 
 The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.

From 3aefd0f50bc1cbfd6e6c84ab6196417f8e062aa0 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 12 Feb 2024 18:01:43 +0000
Subject: [PATCH 587/884] copy on mac instead renaming

---
 utils/self-extracting-executable/decompressor.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 6614403c0ab..071ecb066cb 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -529,14 +529,22 @@ int main(int/* argc*/, char* argv[])
         char decompressed_name[decompressed_name_len + 1];
         (void)snprintf(decompressed_name, decompressed_name_len + 1, decompressed_name_fmt, self, decompressed_suffix);
 
+#if defined(OS_DARWIN)
+        // We can't just rename it on Mac due to security issues, so we copy it...
         std::error_code ec;
-
+        std::filesystem::copy_file(static_cast<char *>(decompressed_name), static_cast<char *>(self), ec);
+        if (ec)
+        {
+            std::cerr << ec.message() << std::endl;
+            return 1;
+        }
+#else
         if (link(decompressed_name, self))
         {
             perror("link");
             return 1;
         }
-
+#endif
         if (chmod(self, static_cast<uint32_t>(decompressed_umask)))
         {
             perror("chmod");

From 6a3c3624435d749c6a83caa0c4f03c67d9f25b50 Mon Sep 17 00:00:00 2001
From: MyroTk <44327070+MyroTk@users.noreply.github.com>
Date: Mon, 12 Feb 2024 10:39:26 -0800
Subject: [PATCH 588/884] Update Dockerfile

---
 docker/test/integration/runner/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index b876f7b9635..473278104b2 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -62,6 +62,7 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
 # kazoo 2.10.0 is broken
 # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
 RUN python3 -m pip install --no-cache-dir \
+    aerospike==11.1.0 \
     PyMySQL==1.1.0 \
     asyncio==3.4.3 \
     avro==1.10.2 \

From d008ee725f7d1e1bad802c5153111e41622481b1 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 12 Feb 2024 20:23:21 +0100
Subject: [PATCH 589/884] Add a test

---
 src/Storages/S3Queue/S3QueueTableMetadata.h   |  8 +--
 .../integration/test_storage_s3_queue/test.py | 50 +++++++++++++++++++
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index 30642869930..84087f72a6a 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -21,10 +21,10 @@ struct S3QueueTableMetadata
     String columns;
     String after_processing;
     String mode;
-    UInt64 s3queue_tracked_files_limit;
-    UInt64 s3queue_tracked_file_ttl_sec;
-    UInt64 s3queue_total_shards_num;
-    UInt64 s3queue_processing_threads_num;
+    UInt64 s3queue_tracked_files_limit = 0;
+    UInt64 s3queue_tracked_file_ttl_sec = 0;
+    UInt64 s3queue_total_shards_num = 1;
+    UInt64 s3queue_processing_threads_num = 1;
 
     S3QueueTableMetadata() = default;
     S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata);
diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 810c4f29e9d..a7abd840834 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -101,6 +101,15 @@ def started_cluster():
             ],
             stay_alive=True,
         )
+        cluster.add_instance(
+            "old_instance",
+            with_zookeeper=True,
+            image="clickhouse/clickhouse-server",
+            tag="23.12",
+            stay_alive=True,
+            with_installed_binary=True,
+            allow_analyzer=False,
+        )
 
         logging.info("Starting cluster...")
         cluster.start()
@@ -1386,3 +1395,44 @@ def test_processed_file_setting_distributed(started_cluster, processing_threads)
             break
         time.sleep(1)
     assert expected_rows == get_count()
+
+
+def test_upgrade(started_cluster):
+    node = started_cluster.instances["old_instance"]
+
+    table_name = f"test_upgrade"
+    dst_table_name = f"{table_name}_dst"
+    keeper_path = f"/clickhouse/test_{table_name}"
+    files_path = f"{table_name}_data"
+    files_to_generate = 10
+
+    create_table(
+        started_cluster,
+        node,
+        table_name,
+        "ordered",
+        files_path,
+        additional_settings={
+            "keeper_path": keeper_path,
+        },
+    )
+    total_values = generate_random_files(
+        started_cluster, files_path, files_to_generate, start_ind=0, row_num=1
+    )
+
+    create_mv(node, table_name, dst_table_name)
+
+    def get_count():
+        return int(node.query(f"SELECT count() FROM {dst_table_name}"))
+
+    expected_rows = 10
+    for _ in range(20):
+        if expected_rows == get_count():
+            break
+        time.sleep(1)
+
+    assert expected_rows == get_count()
+
+    node.restart_with_latest_version()
+
+    assert expected_rows == get_count()

From 50db80a7e3bfd4a0c6135faca865e3bd6d6e6c95 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 19:36:41 +0000
Subject: [PATCH 590/884] Update tests with indexHint for analyzer.

---
 tests/queries/0_stateless/01739_index_hint.reference         | 5 ++++-
 tests/queries/0_stateless/01739_index_hint.sql               | 4 +++-
 .../0_stateless/02880_indexHint__partition_id.reference      | 5 +++--
 tests/queries/0_stateless/02880_indexHint__partition_id.sql  | 5 +++--
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference
index 21673bf698b..21f4edc0049 100644
--- a/tests/queries/0_stateless/01739_index_hint.reference
+++ b/tests/queries/0_stateless/01739_index_hint.reference
@@ -35,6 +35,9 @@ SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_us
 drop table XXXX;
 CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1;
 INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
-SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1;
+SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1, allow_experimental_analyzer=0;
 0
+-- TODO: optimize_use_implicit_projections ignores indexHint (with analyzer) because source columns might be aliased.
+SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1, allow_experimental_analyzer=1;
+3
 drop table XXXX;
diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql
index cde46a5a2bf..1eca65f0892 100644
--- a/tests/queries/0_stateless/01739_index_hint.sql
+++ b/tests/queries/0_stateless/01739_index_hint.sql
@@ -38,6 +38,8 @@ CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PART
 
 INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
 
-SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1;
+SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1, allow_experimental_analyzer=0;
+-- TODO: optimize_use_implicit_projections ignores indexHint (with analyzer) because source columns might be aliased.
+SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1, allow_experimental_analyzer=1;
 
 drop table XXXX;
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.reference b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
index 365e7b676c7..2cdd2cc1954 100644
--- a/tests/queries/0_stateless/02880_indexHint__partition_id.reference
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
@@ -1,9 +1,10 @@
 -- { echoOn }
 select * from data prewhere indexHint(_partition_id = '1');
 1
-select count() from data prewhere indexHint(_partition_id = '1');
+-- TODO: optimize_use_implicit_projections ignores indexHint (with analyzer) because source columns might be aliased.
+select count() from data prewhere indexHint(_partition_id = '1') settings optimize_use_implicit_projections = 0;
 1
 select * from data where indexHint(_partition_id = '1');
 1
-select count() from data where indexHint(_partition_id = '1');
+select count() from data where indexHint(_partition_id = '1') settings optimize_use_implicit_projections = 0;
 1
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.sql b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
index d15b3f4ccea..9d5dc7bcbc2 100644
--- a/tests/queries/0_stateless/02880_indexHint__partition_id.sql
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
@@ -4,6 +4,7 @@ insert into data values (1)(2);
 
 -- { echoOn }
 select * from data prewhere indexHint(_partition_id = '1');
-select count() from data prewhere indexHint(_partition_id = '1');
+-- TODO: optimize_use_implicit_projections ignores indexHint (with analyzer) because source columns might be aliased.
+select count() from data prewhere indexHint(_partition_id = '1') settings optimize_use_implicit_projections = 0;
 select * from data where indexHint(_partition_id = '1');
-select count() from data where indexHint(_partition_id = '1');
+select count() from data where indexHint(_partition_id = '1') settings optimize_use_implicit_projections = 0;

From 4d220322cb4a48f8817f3e1ea4296223b2c53edd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 19:48:02 +0000
Subject: [PATCH 591/884] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 53154085b62..29331d674c8 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -8,8 +8,6 @@
 01584_distributed_buffer_cannot_find_column
 01624_soft_constraints
 01656_test_query_log_factories_info
-01739_index_hint
-02880_indexHint__partition_id
 01747_join_view_filter_dictionary
 01761_cast_to_enum_nullable
 01925_join_materialized_columns

From 5868cdc708f5815ee22a298473898679ccf31146 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 20:11:44 +0100
Subject: [PATCH 592/884] Remove one unused implementation of Macros::expand()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/Macros.cpp | 9 ---------
 src/Common/Macros.h   | 2 --
 2 files changed, 11 deletions(-)

diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp
index 9e0977d9bcc..1d95037b38b 100644
--- a/src/Common/Macros.cpp
+++ b/src/Common/Macros.cpp
@@ -175,15 +175,6 @@ String Macros::expand(const String & s) const
     return expand(s, info);
 }
 
-String Macros::expand(const String & s, const StorageID & table_id, bool allow_uuid) const
-{
-    MacroExpansionInfo info;
-    info.table_id = table_id;
-    if (!allow_uuid)
-        info.table_id.uuid = UUIDHelpers::Nil;
-    return expand(s, info);
-}
-
 Names Macros::expand(const Names & source_names, size_t level) const
 {
     Names result_names;
diff --git a/src/Common/Macros.h b/src/Common/Macros.h
index 8b9eded7dcb..4f72932bdfd 100644
--- a/src/Common/Macros.h
+++ b/src/Common/Macros.h
@@ -57,8 +57,6 @@ public:
 
     String expand(const String & s) const;
 
-    String expand(const String & s, const StorageID & table_id, bool allow_uuid) const;
-
 
     /** Apply expand for the list.
       */

From c61ac1d3bc9f26ca30dfee7666de230833b855bc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 20:30:12 +0100
Subject: [PATCH 593/884] Convert default_replica_path/default_replica_name
 into server settings

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/server/config.xml                          |  5 +++++
 src/Backups/DDLAdjustingForBackupVisitor.cpp        |  6 +++---
 src/Core/ServerSettings.h                           |  2 ++
 src/Storages/MergeTree/registerStorageMergeTree.cpp |  6 +++---
 src/Storages/StorageReplicatedMergeTree.cpp         | 12 ------------
 src/Storages/StorageReplicatedMergeTree.h           |  3 ---
 6 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 6a40818332b..23f3458110e 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -937,6 +937,11 @@
     </macros>
     -->
 
+    <!--
+    <default_replica_path>/clickhouse/tables/{database}/{table}</default_replica_path>
+    <default_replica_name>{replica}</default_replica_name>
+    -->
+
     <!-- Replica group name for database Replicated.
           The cluster created by Replicated database will consist of replicas in the same group.
           DDL queries will only wail for the replicas in the same group.
diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp
index 0bff3cc9f4e..5ea91094b75 100644
--- a/src/Backups/DDLAdjustingForBackupVisitor.cpp
+++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp
@@ -57,9 +57,9 @@ namespace
                 if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos)
                     zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}");
             }
-            const auto & config = data.global_context->getConfigRef();
-            if ((zookeeper_path_arg == StorageReplicatedMergeTree::getDefaultZooKeeperPath(config))
-                && (replica_name_arg == StorageReplicatedMergeTree::getDefaultReplicaName(config))
+            const auto & server_settings = data.global_context->getServerSettings();
+            if ((zookeeper_path_arg == server_settings.default_replica_path.value)
+                && (replica_name_arg == server_settings.default_replica_name.value)
                 && ((engine_args.size() == 2) || !engine_args[2]->as<ASTLiteral>()))
             {
                 engine_args.erase(engine_args.begin(), engine_args.begin() + 2);
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index b10c0d8e093..de2a4e9b755 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -115,6 +115,8 @@ namespace DB
     M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
     M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
+    M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
+    M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
 
     /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
 
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 8e646e48f16..832879b9a4f 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -404,10 +404,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         {
             /// Try use default values if arguments are not specified.
             /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic.
-            const auto & config = args.getContext()->getConfigRef();
-            zookeeper_path = StorageReplicatedMergeTree::getDefaultZooKeeperPath(config);
+            const auto & server_settings = args.getContext()->getServerSettings();
+            zookeeper_path = server_settings.default_replica_path;
             /// TODO maybe use hostname if {replica} is not defined?
-            replica_name = StorageReplicatedMergeTree::getDefaultReplicaName(config);
+            replica_name = server_settings.default_replica_name;
 
             /// Modify query, so default values will be written to metadata
             assert(arg_num == 0);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 80fe2c7a838..b30d392d641 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -545,18 +545,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
 }
 
 
-String StorageReplicatedMergeTree::getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config)
-{
-    return config.getString("default_replica_path", "/clickhouse/tables/{uuid}/{shard}");
-}
-
-
-String StorageReplicatedMergeTree::getDefaultReplicaName(const Poco::Util::AbstractConfiguration & config)
-{
-    return config.getString("default_replica_name", "{replica}");
-}
-
-
 bool StorageReplicatedMergeTree::checkFixedGranularityInZookeeper()
 {
     auto zookeeper = getZooKeeper();
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index c682b1ec88d..79d6d1dce3d 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -143,9 +143,6 @@ public:
 
     ~StorageReplicatedMergeTree() override;
 
-    static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config);
-    static String getDefaultReplicaName(const Poco::Util::AbstractConfiguration & config);
-
     std::string getName() const override { return "Replicated" + merging_params.getModeName() + "MergeTree"; }
 
     bool supportsParallelInsert() const override { return true; }

From eb44faf867152713ebbbe3753a821acf803f8e37 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 12 Feb 2024 20:41:46 +0000
Subject: [PATCH 594/884] Test to reproduce absence of closing record in
 query_log

---
 src/Common/FailPoint.cpp                      |  5 ++--
 src/Common/FailPoint.h                        |  2 --
 src/Interpreters/executeQuery.cpp             | 18 ++++++++----
 .../__init__.py                               |  0
 .../test_insert_exception_over_http/test.py   | 29 +++++++++++++++++++
 5 files changed, 45 insertions(+), 9 deletions(-)
 create mode 100644 tests/integration/test_insert_exception_over_http/__init__.py
 create mode 100644 tests/integration/test_insert_exception_over_http/test.py

diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index f29aee0cdcc..e0639df29b4 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -1,3 +1,4 @@
+#include "config.h"
 #include <Common/Exception.h>
 #include <Common/FailPoint.h>
 #include <Common/Config/ConfigHelper.h>
@@ -6,7 +7,6 @@
 #include <chrono>
 #include <condition_variable>
 #include <mutex>
-#include <optional>
 
 namespace DB
 {
@@ -44,7 +44,8 @@ static struct InitFiu
     REGULAR(dummy_failpoint) \
     REGULAR(prefetched_reader_pool_failpoint) \
     PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \
-    PAUSEABLE(dummy_pausable_failpoint)
+    PAUSEABLE(dummy_pausable_failpoint) \
+    ONCE(execute_query_calling_empty_set_result_func_on_exception)
 
 namespace FailPoints
 {
diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h
index b9eb13903a6..a0081d67081 100644
--- a/src/Common/FailPoint.h
+++ b/src/Common/FailPoint.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "config.h"
 
 #include <Common/Exception.h>
 #include <Core/Types.h>
@@ -18,7 +17,6 @@
 #pragma clang diagnostic pop
 #endif
 
-#include <any>
 #include <unordered_map>
 
 namespace DB
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 8b36790a269..d724d350958 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -4,6 +4,7 @@
 #include <Common/ThreadProfileEvents.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
 #include <Common/SensitiveDataMasker.h>
+#include <Common/FailPoint.h>
 
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Interpreters/Cache/QueryCache.h>
@@ -105,6 +106,10 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
 }
 
+namespace FailPoints
+{
+    extern const char execute_query_calling_empty_set_result_func_on_exception[];
+}
 
 static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
 {
@@ -1361,7 +1366,7 @@ void executeQuery(
     BlockIO streams;
     OutputFormatPtr output_format;
 
-    auto update_format_for_exception_if_needed = [&]()
+    auto update_format_on_exception_if_needed = [&]()
     {
         if (!output_format)
         {
@@ -1374,10 +1379,13 @@ void executeQuery(
                     /// Force an update of the headers before we start writing
                     result_details.content_type = output_format->getContentType();
                     result_details.format = format_name;
+
                     if (set_result_details)
                     {
-                        set_result_details(result_details);
+                        /// reset set_result_details func to avoid calling in SCOPE_EXIT()
+                        auto set_result_details_copy = set_result_details;
                         set_result_details = nullptr;
+                        set_result_details_copy(result_details);
                     }
                 }
             }
@@ -1397,7 +1405,7 @@ void executeQuery(
     {
         if (handle_exception_in_output_format)
         {
-            update_format_for_exception_if_needed();
+            update_format_on_exception_if_needed();
             if (output_format)
                 handle_exception_in_output_format(*output_format);
         }
@@ -1427,7 +1435,7 @@ void executeQuery(
                 const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
 
                 std::string compression_method;
-                if (ast_query_with_output->compression)
+                if (ast_query_with_output->compressiong
                 {
                     const auto & compression_method_node = ast_query_with_output->compression->as<ASTLiteral &>();
                     compression_method = compression_method_node.value.safeGet<std::string>();
@@ -1500,7 +1508,7 @@ void executeQuery(
     {
         if (handle_exception_in_output_format)
         {
-            update_format_for_exception_if_needed();
+            update_format_on_exception_if_needed();
             if (output_format)
                 handle_exception_in_output_format(*output_format);
         }
diff --git a/tests/integration/test_insert_exception_over_http/__init__.py b/tests/integration/test_insert_exception_over_http/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py
new file mode 100644
index 00000000000..3630b8379aa
--- /dev/null
+++ b/tests/integration/test_insert_exception_over_http/test.py
@@ -0,0 +1,29 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+instance = cluster.add_instance("instance", with_zookeeper=True)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_insert_exception_over_http(start_cluster):
+    instance.query("DROP TABLE IF EXISTS tt SYNC");
+    instance.query("CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)");
+    instance.query("SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception");
+
+    assert True == instance.http_query_and_get_error("insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)").startswith("500 Internal Server Error")
+
+    assert "0\n" == instance.query("select count() from tt")
+    instance.query("SYSTEM FLUSH LOGS")
+    assert "2\n" ==  instance.query("select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday()")
+
+    instance.query("DROP TABLE tt SYNC")

From f5880d7201e0c939629d8115cf2cbfb148d0966c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 12 Feb 2024 20:55:20 +0000
Subject: [PATCH 595/884] Automatic style fix

---
 .../test_insert_exception_over_http/test.py    | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py
index 3630b8379aa..962c20d6bff 100644
--- a/tests/integration/test_insert_exception_over_http/test.py
+++ b/tests/integration/test_insert_exception_over_http/test.py
@@ -16,14 +16,22 @@ def start_cluster():
 
 
 def test_insert_exception_over_http(start_cluster):
-    instance.query("DROP TABLE IF EXISTS tt SYNC");
-    instance.query("CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)");
-    instance.query("SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception");
+    instance.query("DROP TABLE IF EXISTS tt SYNC")
+    instance.query(
+        "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)"
+    )
+    instance.query(
+        "SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception"
+    )
 
-    assert True == instance.http_query_and_get_error("insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)").startswith("500 Internal Server Error")
+    assert True == instance.http_query_and_get_error(
+        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)"
+    ).startswith("500 Internal Server Error")
 
     assert "0\n" == instance.query("select count() from tt")
     instance.query("SYSTEM FLUSH LOGS")
-    assert "2\n" ==  instance.query("select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday()")
+    assert "2\n" == instance.query(
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday()"
+    )
 
     instance.query("DROP TABLE tt SYNC")

From d1167d0b04f3bec6abb6155c0e1f40851fa5d5e9 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 12 Feb 2024 20:55:19 +0000
Subject: [PATCH 596/884] Fix: corrupted source + fail point

---
 src/Interpreters/executeQuery.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index d724d350958..345472fe4d0 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1380,6 +1380,11 @@ void executeQuery(
                     result_details.content_type = output_format->getContentType();
                     result_details.format = format_name;
 
+                    fiu_do_on(FailPoint::execute_query_calling_empty_set_result_func_on_exception, {
+                        set_result_details = nullptr;
+                        set_result_details(result_details);
+                    });
+
                     if (set_result_details)
                     {
                         /// reset set_result_details func to avoid calling in SCOPE_EXIT()
@@ -1435,7 +1440,7 @@ void executeQuery(
                 const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
 
                 std::string compression_method;
-                if (ast_query_with_output->compressiong
+                if (ast_query_with_output->compression)
                 {
                     const auto & compression_method_node = ast_query_with_output->compression->as<ASTLiteral &>();
                     compression_method = compression_method_node.value.safeGet<std::string>();

From b68bc3d3d717b902f39c38e91d05e4979956a052 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 13 Feb 2024 02:07:10 +0100
Subject: [PATCH 597/884] Update
 docs/en/sql-reference/statements/create/table.md

---
 docs/en/sql-reference/statements/create/table.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 067761e760b..0edf158e981 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -515,7 +515,7 @@ ENGINE = MergeTree ORDER BY x;
 ## Temporary Tables
 
 :::note
-Please note that on ClickHouse Cloud, temporary tables are not replicated. As a result, there is no guarantee that data inserted into a temporary table will be available in other replicas. The primary use case where temporary tables can be useful in ClickHouse Cloud is for querying or joining small external datasets during a single session.
+Please note that temporary tables are not replicated. As a result, there is no guarantee that data inserted into a temporary table will be available in other replicas. The primary use case where temporary tables can be useful is for querying or joining small external datasets during a single session.
 :::
 
 ClickHouse supports temporary tables which have the following characteristics:

From a98af159b5b8afc00a161cce437b8231e8bb164b Mon Sep 17 00:00:00 2001
From: Nikolai Fedorovskikh <fenik17@gmail.com>
Date: Tue, 13 Feb 2024 02:10:41 +0100
Subject: [PATCH 598/884] [Docs] fix some typos and missing commas

---
 docs/en/development/architecture.md            |  6 +++---
 docs/en/development/build-cross-s390x.md       |  2 +-
 .../building_and_benchmarking_deflate_qpl.md   |  4 ++--
 docs/en/development/contrib.md                 |  2 +-
 docs/en/development/developer-instruction.md   | 12 ++++++------
 .../mergetree-family/annindexes.md             | 12 ++++++------
 .../mergetree-family/collapsingmergetree.md    |  4 ++--
 .../table-engines/special/distributed.md       |  4 ++--
 .../example-datasets/amazon-reviews.md         |  2 +-
 .../example-datasets/cell-towers.md            |  4 ++--
 .../example-datasets/covid19.md                |  4 ++--
 docs/en/interfaces/cli.md                      |  2 +-
 docs/en/interfaces/formats.md                  |  2 +-
 docs/en/operations/configuration-files.md      |  4 ++--
 .../system-tables/asynchronous_loader.md       |  2 +-
 .../system-tables/asynchronous_metrics.md      | 10 +++++-----
 docs/en/operations/system-tables/clusters.md   |  2 +-
 .../operations/system-tables/dictionaries.md   |  2 +-
 .../operations/utilities/clickhouse-local.md   |  4 ++--
 .../utilities/clickhouse-obfuscator.md         |  2 +-
 .../aggregate-functions/reference/any.md       |  2 +-
 .../reference/contingency.md                   |  4 ++--
 docs/en/sql-reference/data-types/datetime64.md |  2 +-
 docs/en/sql-reference/data-types/decimal.md    |  2 +-
 docs/en/sql-reference/distributed-ddl.md       |  2 +-
 .../functions/bitmap-functions.md              |  2 +-
 .../functions/date-time-functions.md           | 18 +++++++++---------
 .../statements/alter/apply-deleted-mask.md     |  2 +-
 .../statements/alter/constraint.md             |  2 +-
 docs/en/sql-reference/statements/detach.md     |  4 ++--
 .../statements/select/distinct.md              |  2 +-
 docs/ru/development/architecture.md            |  2 +-
 docs/ru/development/developer-instruction.md   |  6 +++---
 .../mergetree-family/collapsingmergetree.md    |  2 +-
 .../ru/engines/table-engines/special/buffer.md |  4 ++--
 docs/ru/interfaces/cli.md                      |  6 +++---
 docs/ru/operations/clickhouse-keeper.md        |  4 ++--
 docs/ru/operations/configuration-files.md      |  4 ++--
 .../utilities/clickhouse-benchmark.md          |  2 +-
 docs/ru/sql-reference/data-types/datetime.md   |  2 +-
 .../functions/arithmetic-functions.md          |  2 +-
 .../sql-reference/functions/array-functions.md |  4 ++--
 .../statements/select/distinct.md              |  2 +-
 .../sql-reference/table-functions/cluster.md   |  2 +-
 .../operations/system-tables/dictionaries.md   |  2 +-
 45 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md
index cfdd2bbcc41..d3a29c9171b 100644
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@@ -166,11 +166,11 @@ For most external applications, we recommend using the HTTP interface because it
 
 ## Configuration {#configuration}
 
-ClickHouse Server is based on POCO C++ Libraries and uses `Poco::Util::AbstractConfiguration` to represent it's configuration. Configuration is held by `Poco::Util::ServerApplication` class inherited by `DaemonBase` class, which in turn is inherited by `DB::Server` class, implementing clickhouse-server itself. So config can be accessed by `ServerApplication::config()` method.
+ClickHouse Server is based on POCO C++ Libraries and uses `Poco::Util::AbstractConfiguration` to represent its configuration. Configuration is held by `Poco::Util::ServerApplication` class inherited by `DaemonBase` class, which in turn is inherited by `DB::Server` class, implementing clickhouse-server itself. So config can be accessed by `ServerApplication::config()` method.
 
 Config is read from multiple files (in XML or YAML format) and merged into single `AbstractConfiguration` by `ConfigProcessor` class. Configuration is loaded at server startup and can be reloaded later if one of config files is updated, removed or added. `ConfigReloader` class is responsible for periodic monitoring of these changes and reload procedure as well. `SYSTEM RELOAD CONFIG` query also triggers config to be reloaded.
 
-For queries and subsystems other than `Server` config is accessible using `Context::getConfigRef()` method. Every subsystem that is capable of reloading it's config without server restart should register itself in reload callback in `Server::main()` method. Note that if newer config has an error, most subsystems will ignore new config, log warning messages and keep working with previously loaded config. Due to the nature of `AbstractConfiguration` it is not possible to pass reference to specific section, so `String config_prefix` is usually used instead.
+For queries and subsystems other than `Server` config is accessible using `Context::getConfigRef()` method. Every subsystem that is capable of reloading its config without server restart should register itself in reload callback in `Server::main()` method. Note that if newer config has an error, most subsystems will ignore new config, log warning messages and keep working with previously loaded config. Due to the nature of `AbstractConfiguration` it is not possible to pass reference to specific section, so `String config_prefix` is usually used instead.
 
 ## Threads and jobs {#threads-and-jobs}
 
@@ -255,7 +255,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim
 
 When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts.
 
-`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications
+`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore, frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications
 
 There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form.
 
diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md
index b7cda515d77..a4a83c7989b 100644
--- a/docs/en/development/build-cross-s390x.md
+++ b/docs/en/development/build-cross-s390x.md
@@ -38,7 +38,7 @@ ninja
 
 ## Running
 
-Once built, the binary can be run with, eg.:
+Once built, the binary can be run with, e.g.:
 
 ```bash
 qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md
index 4e01b41ab3c..b9d39b8cc2d 100644
--- a/docs/en/development/building_and_benchmarking_deflate_qpl.md
+++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md
@@ -95,7 +95,7 @@ Complete below three steps mentioned in [Star Schema Benchmark](https://clickhou
 - Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data.
 - Converting “star schema” to de-normalized “flat schema”
 
-Set up database with with IAA Deflate codec
+Set up database with IAA Deflate codec
 
 ``` bash
 $ cd ./database_dir/deflate
@@ -104,7 +104,7 @@ $ [CLICKHOUSE_EXE] client
 ```
 Complete three steps same as lz4 above
 
-Set up database with with ZSTD codec
+Set up database with ZSTD codec
 
 ``` bash
 $ cd ./database_dir/zstd
diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md
index 4b296c43db4..bbc5fbeebcb 100644
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@@ -13,7 +13,7 @@ ClickHouse utilizes third-party libraries for different purposes, e.g., to conne
 SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en';
 ```
 
-(Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime.
+Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime.
 
 [Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
 
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 31346c77949..33558b4373c 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -7,13 +7,13 @@ description: Prerequisites and an overview of how to build ClickHouse
 
 # Getting Started Guide for Building ClickHouse
 
-ClickHouse can be build on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu.
+ClickHouse can be built on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu.
 
 ClickHouse requires a 64-bit system to compile and run, 32-bit systems do not work.
 
 ## Creating a Repository on GitHub {#creating-a-repository-on-github}
 
-To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate a SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches.
+To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate an SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches.
 
 Next, create a fork of the [ClickHouse repository](https://github.com/ClickHouse/ClickHouse/) in your personal account by clicking the "fork" button in the upper right corner.
 
@@ -37,7 +37,7 @@ git clone git@github.com:your_github_username/ClickHouse.git  # replace placehol
 cd ClickHouse
 ```
 
-This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL but it is important that this path does not contain whitespaces as it may lead to problems with the build later on.
+This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL, but it is important that this path does not contain whitespaces as it may lead to problems with the build later on.
 
 The ClickHouse repository uses Git submodules, i.e. references to external repositories (usually 3rd party libraries used by ClickHouse). These are not checked out by default. To do so, you can either
 
@@ -45,7 +45,7 @@ The ClickHouse repository uses Git submodules, i.e. references to external repos
 
 - if `git clone` did not check out submodules, run `git submodule update --init --jobs <N>` (e.g. `<N> = 12` to parallelize the checkout) to achieve the same as the previous alternative, or
 
-- if `git clone` did not check out submodules and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower.
+- if `git clone` did not check out submodules, and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower.
 
 You can check the Git status with the command: `git submodule status`.
 
@@ -143,7 +143,7 @@ When a large amount of RAM is available on build machine you should limit the nu
 
 On machines with 4GB of RAM, it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended.
 
-If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed and you need to inspect the message above.
+If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed, and you need to inspect the message above.
 
 Upon the successful start of the building process, you’ll see the build progress - the number of processed tasks and the total number of tasks.
 
@@ -184,7 +184,7 @@ You can also run your custom-built ClickHouse binary with the config file from t
 
 **CLion (recommended)**
 
-If you do not know which IDE to use, we recommend that you use [CLion](https://www.jetbrains.com/clion/). CLion is commercial software but it offers a 30 day free trial. It is also free of charge for students. CLion can be used on both Linux and macOS.
+If you do not know which IDE to use, we recommend that you use [CLion](https://www.jetbrains.com/clion/). CLion is commercial software, but it offers a 30 day free trial. It is also free of charge for students. CLion can be used on both Linux and macOS.
 
 A few things to know when using CLion to develop ClickHouse:
 
diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index be588f1764d..78a27d3ff86 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -2,7 +2,7 @@
 
 Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most
 straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the
-reference point is computed. This method guarantees perfect accuracy but it is usually too slow for practical applications. Thus, nearest
+reference point is computed. This method guarantees perfect accuracy, but it is usually too slow for practical applications. Thus, nearest
 neighborhood search problems are often solved with [approximative algorithms](https://github.com/erikbern/ann-benchmarks). Approximative
 nearest neighborhood search techniques, in conjunction with [embedding
 methods](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning) allow to search huge
@@ -24,7 +24,7 @@ LIMIT N
 
 `vectors` contains N-dimensional values of type [Array](../../../sql-reference/data-types/array.md) or
 [Tuple](../../../sql-reference/data-types/tuple.md), for example embeddings. Function `Distance` computes the distance between two vectors.
-Often, the the Euclidean (L2) distance is chosen as distance function but [other
+Often, the Euclidean (L2) distance is chosen as distance function but [other
 distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
 0.33, ...)`, and `N` limits the number of search results.
 
@@ -109,7 +109,7 @@ clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_inde
 
 **Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
 clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
-without `LIMIT` clause cannot utilize ANN indexes. Also ANN indexes are only used if the query has a `LIMIT` value smaller than setting
+without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
 `max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
 approximate neighbor search.
 
@@ -120,9 +120,9 @@ then each indexed block will contain 16384 rows. However, data structures and al
 provided by external libraries) are inherently row-oriented. They store a compact representation of a set of rows and also return rows for
 ANN queries. This causes some rather unintuitive differences in the way ANN indexes behave compared to normal skip indexes.
 
-When a user defines a ANN index on a column, ClickHouse internally creates a ANN "sub-index" for each index block. The sub-index is "local"
+When a user defines an ANN index on a column, ClickHouse internally creates an ANN "sub-index" for each index block. The sub-index is "local"
 in the sense that it only knows about the rows of its containing index block. In the previous example and assuming that a column has 65536
-rows, we obtain four index blocks (spanning eight granules) and a ANN sub-index for each index block. A sub-index is theoretically able to
+rows, we obtain four index blocks (spanning eight granules) and an ANN sub-index for each index block. A sub-index is theoretically able to
 return the rows with the N closest points within its index block directly. However, since ClickHouse loads data from disk to memory at the
 granularity of granules, sub-indexes extrapolate matching rows to granule granularity. This is different from regular skip indexes which
 skip data at the granularity of index blocks.
@@ -231,7 +231,7 @@ The Annoy index currently does not work with per-table, non-default `index_granu
 
 ## USearch {#usearch}
 
-This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
+This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
 algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
 similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
 overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
index 0043e1b6748..ba4021d8422 100644
--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@@ -125,7 +125,7 @@ For each resulting data part ClickHouse saves:
 3.  The first “cancel” row, if there are more “cancel” rows than “state” rows.
 4.  None of the rows, in all other cases.
 
-Also when there are at least 2 more “state” rows than “cancel” rows, or at least 2 more “cancel” rows then “state” rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once.
+Also, when there are at least 2 more “state” rows than “cancel” rows, or at least 2 more “cancel” rows then “state” rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once.
 
 Thus, collapsing should not change the results of calculating statistics.
 Changes gradually collapsed so that in the end only the last state of almost every object left.
@@ -196,7 +196,7 @@ What do we see and where is collapsing?
 
 With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment which we can not predict.
 
-Thus we need aggregation:
+Thus, we need aggregation:
 
 ``` sql
 SELECT
diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index de8ae0357dc..c3b8a2f2048 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -72,7 +72,7 @@ Specifying the `sharding_key` is necessary for the following:
 
 #### fsync_directories
 
-`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to background inserts on Distributed table (after insert, after sending the data to shard, etc).
+`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to background inserts on Distributed table (after insert, after sending the data to shard, etc.).
 
 #### bytes_to_throw_insert
 
@@ -220,7 +220,7 @@ Second, you can perform `INSERT` statements on a `Distributed` table. In this ca
 
 Each shard can have a `<weight>` defined in the config file. By default, the weight is `1`. Data is distributed across shards in the amount proportional to the shard weight. All shard weights are summed up, then each shard's weight is divided by the total to determine each shard's proportion. For example, if there are two shards and the first has a weight of 1 while the second has a weight of 2, the first will be sent one third (1 / 3) of inserted rows and the second will be sent two thirds (2 / 3).
 
-Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write and it will be replicated to the other replicas automatically.
+Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write, and it will be replicated to the other replicas automatically.
 
 If `internal_replication` is set to `false` (the default), data is written to all replicas. In this case, the `Distributed` table replicates data itself. This is worse than using replicated tables because the consistency of replicas is not checked and, over time, they will contain slightly different data.
 
diff --git a/docs/en/getting-started/example-datasets/amazon-reviews.md b/docs/en/getting-started/example-datasets/amazon-reviews.md
index 00dc553782c..c07ffa86dd9 100644
--- a/docs/en/getting-started/example-datasets/amazon-reviews.md
+++ b/docs/en/getting-started/example-datasets/amazon-reviews.md
@@ -12,7 +12,7 @@ The queries below were executed on a **Production** instance of [ClickHouse Clou
 :::
 
 
-1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows so we can see what they look like:
+1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows, so we can see what they look like:
 
 ```sql
 SELECT *
diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md
index a84eb5d561f..090de1b32fd 100644
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@@ -29,7 +29,7 @@ Here is a preview of the dashboard created in this guide:
 
 This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
 
-As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
+As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc.).
 
 OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in.
 
@@ -355,7 +355,7 @@ Click on **UPDATE CHART** to render the visualization.
 
 ### Add the charts to a **dashboard**
 
-This screenshot shows cell tower locations with LTE, UMTS, and GSM radios.  The charts are all created in the same way and they are added to a dashboard.
+This screenshot shows cell tower locations with LTE, UMTS, and GSM radios.  The charts are all created in the same way, and they are added to a dashboard.
 
   ![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
 
diff --git a/docs/en/getting-started/example-datasets/covid19.md b/docs/en/getting-started/example-datasets/covid19.md
index 3a7fae89ae0..da9dc4aa96b 100644
--- a/docs/en/getting-started/example-datasets/covid19.md
+++ b/docs/en/getting-started/example-datasets/covid19.md
@@ -132,7 +132,7 @@ FROM covid19;
 └────────────────────────────────────────────┘
 ```
 
-7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
+7. You will notice the data has a lot of 0's for dates - either weekends or days when numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
 
 ```sql
 SELECT
@@ -262,4 +262,4 @@ The results look like
 
 :::note
 As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022.
-:::
\ No newline at end of file
+:::
diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md
index 518037a2c7c..2b90d684c13 100644
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@@ -243,7 +243,7 @@ If no database is specified, the `default` database will be used.
 
 If the user name, password or database was specified in the connection string, it cannot be specified using `--user`, `--password` or `--database` (and vice versa).
 
-The host component can either be an a host name and IP address. Put an IPv6 address in square brackets to specify it:
+The host component can either be a host name and IP address. Put an IPv6 address in square brackets to specify it:
 
 ```text
 clickhouse://[2001:db8::1234]
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index a11c3e5ef19..dad90454844 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -33,7 +33,7 @@ The supported formats are:
 | [JSONAsString](#jsonasstring)                                                             | ✔    | ✗     |
 | [JSONStrings](#jsonstrings)                                                               | ✔    | ✔     |
 | [JSONColumns](#jsoncolumns)                                                               | ✔    | ✔     |
-| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock))                                         | ✔    | ✔     |
+| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)                                          | ✔    | ✔     |
 | [JSONCompact](#jsoncompact)                                                               | ✔    | ✔     |
 | [JSONCompactStrings](#jsoncompactstrings)                                                 | ✗    | ✔     |
 | [JSONCompactColumns](#jsoncompactcolumns)                                                 | ✔    | ✔     |
diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index 005c7818eb1..5a7b12a7421 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -6,7 +6,7 @@ sidebar_label: Configuration Files
 
 # Configuration Files
 
-The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`.
+The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`.
 
 It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.
 
@@ -63,7 +63,7 @@ XML substitution example:
 </clickhouse>
 ```
 
-Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
+Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element.
 
 ## Encrypting and Hiding Configuration {#encryption}
 
diff --git a/docs/en/operations/system-tables/asynchronous_loader.md b/docs/en/operations/system-tables/asynchronous_loader.md
index af9aa4ecd09..75d98e4549d 100644
--- a/docs/en/operations/system-tables/asynchronous_loader.md
+++ b/docs/en/operations/system-tables/asynchronous_loader.md
@@ -49,6 +49,6 @@ Every job has a pool associated with it and is started in this pool. Each pool h
 
 Time instants during job lifetime:
 - `schedule_time` (`DateTime64`) - Time when job was created and scheduled to be executed (usually with all its dependencies).
-- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet.
+- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of its pool. Null if the job is not ready yet.
 - `start_time` (`Nullable(DateTime64)`) - Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet.
 - `finish_time` (`Nullable(DateTime64)`) - Time when job execution is finished. Null if the job is not finished yet.
diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md
index fe8f963b1ec..81725b97e41 100644
--- a/docs/en/operations/system-tables/asynchronous_metrics.md
+++ b/docs/en/operations/system-tables/asynchronous_metrics.md
@@ -297,11 +297,11 @@ Total number of databases on the server.
 
 ### NumberOfDetachedByUserParts
 
-The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed.
+The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts, and they can be removed.
 
 ### NumberOfDetachedParts
 
-The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed.
+The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts, and they can be removed.
 
 ### NumberOfTables
 
@@ -393,7 +393,7 @@ The amount of free memory plus OS page cache memory on the host system, in bytes
 
 ### OSMemoryFreeWithoutCached
 
-The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server.
+The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience, we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server.
 
 ### OSMemoryTotal
 
@@ -493,7 +493,7 @@ Number of threads in the server of the PostgreSQL compatibility protocol.
 
 ### QueryCacheBytes
 
-Total size of the query cache cache in bytes.
+Total size of the query cache in bytes.
 
 ### QueryCacheEntries
 
@@ -549,7 +549,7 @@ Total amount of bytes (compressed, including data and indices) stored in all tab
 
 ### TotalPartsOfMergeTreeTables
 
-Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key.
+Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time, and it may indicate unreasonable choice of the partition key.
 
 ### TotalPrimaryKeyBytesInMemory
 
diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md
index 63cc083e4bc..7a9f1438b87 100644
--- a/docs/en/operations/system-tables/clusters.md
+++ b/docs/en/operations/system-tables/clusters.md
@@ -19,7 +19,7 @@ Columns:
 - `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name.
 - `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica.
 - `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests.
-- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal.
+- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed, and it is considered to be back to normal.
 - `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database).
 - `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database).
 - `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown".
diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md
index 8632581144c..c4cf7ba8bfb 100644
--- a/docs/en/operations/system-tables/dictionaries.md
+++ b/docs/en/operations/system-tables/dictionaries.md
@@ -18,7 +18,7 @@ Columns:
     - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed).
     - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
 - `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
-- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory).
+- `type` ([String](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory).
 - `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
 - `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
 - `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary.
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index c863282efc1..437a5f0fff0 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -34,7 +34,7 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti
 
 A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
 
-If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
+If the file is sitting on the same machine as `clickhouse-local`, you can simply specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
 
 ```bash
 ./clickhouse local -q "SELECT * FROM 'reviews.tsv'"
@@ -220,7 +220,7 @@ Arguments:
 - `--help` — arguments references for `clickhouse-local`.
 - `-V`, `--version` — print version information and exit.
 
-Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.
+Also, there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.
 
 
 ## Examples {#examples}
diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md
index ad51e9c7776..f9a94713be7 100644
--- a/docs/en/operations/utilities/clickhouse-obfuscator.md
+++ b/docs/en/operations/utilities/clickhouse-obfuscator.md
@@ -38,7 +38,7 @@ For example, you have a column `IsMobile` in your table with values 0 and 1. In
 
 So, the user will be able to count the exact ratio of mobile traffic.
 
-Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
+Let's give another example. When you have some private data in your table, like user email, and you don't want to publish any single email address.
 If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
 You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md
index a45eb1b409f..4631060f33f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/any.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/any.md
@@ -9,7 +9,7 @@ Selects the first encountered value of a column.
 
 By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
 
-The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
+The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
 
 The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
 To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md
index 1b53ca1528f..902c1f4af80 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md
@@ -20,7 +20,7 @@ contingency(column1, column2)
 
 **Returned value**
 
-- a value between 0 to 1. The larger the result, the closer the association of the two columns.
+- a value between 0 and 1. The larger the result, the closer the association of the two columns.
 
 **Return type** is always [Float64](../../../sql-reference/data-types/float.md).
 
@@ -48,4 +48,4 @@ Result:
 ┌──────cramersV(a, b)─┬───contingency(a, b)─┐
 │ 0.41171788506213564 │ 0.05812725261759165 │
 └─────────────────────┴─────────────────────┘
-```
\ No newline at end of file
+```
diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md
index 8c7fa17ae92..504d0e2b0a6 100644
--- a/docs/en/sql-reference/data-types/datetime64.md
+++ b/docs/en/sql-reference/data-types/datetime64.md
@@ -9,7 +9,7 @@ sidebar_label: DateTime64
 Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision
 
 Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
-Typically are used - 3 (milliseconds), 6 (microseconds), 9 (nanoseconds).
+Typically, are used - 3 (milliseconds), 6 (microseconds), 9 (nanoseconds).
 
 **Syntax:**
 
diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md
index e082eb29fbd..2b32e72a28f 100644
--- a/docs/en/sql-reference/data-types/decimal.md
+++ b/docs/en/sql-reference/data-types/decimal.md
@@ -10,7 +10,7 @@ Signed fixed-point numbers that keep precision during add, subtract and multiply
 
 ## Parameters
 
-- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default the precision is 10.
+- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default, the precision is 10.
 - S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have.
 
 Decimal(P) is equivalent to Decimal(P, 0). Similarly, the syntax Decimal is equivalent to Decimal(10, 0).
diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md
index d170f3765c2..7952792cbf4 100644
--- a/docs/en/sql-reference/distributed-ddl.md
+++ b/docs/en/sql-reference/distributed-ddl.md
@@ -6,7 +6,7 @@ sidebar_label: Distributed DDL
 
 # Distributed DDL Queries (ON CLUSTER Clause)
 
-By default the `CREATE`, `DROP`, `ALTER`, and `RENAME` queries affect only the current server where they are executed. In a cluster setup, it is possible to run such queries in a distributed manner with the `ON CLUSTER` clause.
+By default, the `CREATE`, `DROP`, `ALTER`, and `RENAME` queries affect only the current server where they are executed. In a cluster setup, it is possible to run such queries in a distributed manner with the `ON CLUSTER` clause.
 
 For example, the following query creates the `all_hits` `Distributed` table on each host in `cluster`:
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index 9b66d00656b..379be302881 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -372,7 +372,7 @@ Result:
 
 ## bitmapAnd
 
-Computes the logical conjunction of two two bitmaps.
+Computes the logical conjunction of two bitmaps.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 5622097537e..c5b3b4cc3ae 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1564,7 +1564,7 @@ Alias: `TO_DAYS`
 **Arguments**
 
 - `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
 
 **Returned value**
 
@@ -2218,7 +2218,7 @@ now64([scale], [timezone])
 
 **Arguments**
 
-- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds).
+- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds).
 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
@@ -2305,7 +2305,7 @@ Rounds the time to the half hour.
 
 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
-This functions is the opposite of function `YYYYMMDDToDate()`.
+This function is the opposite of function `YYYYMMDDToDate()`.
 
 **Example**
 
@@ -2362,7 +2362,7 @@ Result:
 
 Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md).
 
-This functions is the opposite of function `toYYYYMMDD()`.
+This function is the opposite of function `toYYYYMMDD()`.
 
 The output is undefined if the input does not encode a valid Date value.
 
@@ -2406,7 +2406,7 @@ Converts a number containing the year, month, day, hours, minute and second numb
 
 The output is undefined if the input does not encode a valid DateTime value.
 
-This functions is the opposite of function `toYYYYMMDDhhmmss()`.
+This function is the opposite of function `toYYYYMMDDhhmmss()`.
 
 **Syntax**
 
@@ -2981,8 +2981,8 @@ toUTCTimestamp(time_val, time_zone)
 
 **Arguments**
 
-- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
-- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
 
 **Returned value**
 
@@ -3014,8 +3014,8 @@ fromUTCTimestamp(time_val, time_zone)
 
 **Arguments**
 
-- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
-- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md
index 7a11d66e739..1afc2a0ff5a 100644
--- a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md
+++ b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md
@@ -10,7 +10,7 @@ sidebar_label: APPLY DELETED MASK
 ALTER TABLE [db].name [ON CLUSTER cluster] APPLY DELETED MASK [IN PARTITION partition_id]
 ```
 
-The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```.
+The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation, and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```.
 
 :::note
 It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables).
diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md
index 7a8f5809320..29675f704b5 100644
--- a/docs/en/sql-reference/statements/alter/constraint.md
+++ b/docs/en/sql-reference/statements/alter/constraint.md
@@ -15,7 +15,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
 
 See more on [constraints](../../../sql-reference/statements/create/table.md#constraints).
 
-Queries will add or remove metadata about constraints from table so they are processed immediately.
+Queries will add or remove metadata about constraints from table, so they are processed immediately.
 
 :::tip
 Constraint check **will not be executed** on existing data if it was added.
diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md
index 938a5f9c3cb..e88e625aed1 100644
--- a/docs/en/sql-reference/statements/detach.md
+++ b/docs/en/sql-reference/statements/detach.md
@@ -16,13 +16,13 @@ DETACH TABLE|VIEW|DICTIONARY|DATABASE [IF EXISTS] [db.]name [ON CLUSTER cluster]
 Detaching does not delete the data or metadata of a table, a materialized view, a dictionary or a database. If an entity was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view/dictionary/database again. If an entity was detached `PERMANENTLY`, there will be no automatic recall.
 
 Whether a table, a dictionary or a database was detached permanently or not, in both cases you can reattach them using the [ATTACH](../../sql-reference/statements/attach.md) query.
-System log tables can be also attached back (e.g. `query_log`, `text_log`, etc). Other system tables can't be reattached. On the next server launch the server will recall those tables again.
+System log tables can be also attached back (e.g. `query_log`, `text_log`, etc.). Other system tables can't be reattached. On the next server launch the server will recall those tables again.
 
 `ATTACH MATERIALIZED VIEW` does not work with short syntax (without `SELECT`), but you can attach it using the `ATTACH TABLE` query.
 
 Note that you can not detach permanently the table which is already detached (temporary). But you can attach it back and then detach permanently again.
 
-Also you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
+Also, you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
 
 The `SYNC` modifier executes the action without delay.
 
diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md
index 10326b0ef8f..08359b035ae 100644
--- a/docs/en/sql-reference/statements/select/distinct.md
+++ b/docs/en/sql-reference/statements/select/distinct.md
@@ -5,7 +5,7 @@ sidebar_label: DISTINCT
 
 # DISTINCT Clause
 
-If `SELECT DISTINCT` is specified, only unique rows will remain in a query result. Thus only a single row will remain out of all the sets of fully matching rows in the result.
+If `SELECT DISTINCT` is specified, only unique rows will remain in a query result. Thus, only a single row will remain out of all the sets of fully matching rows in the result.
 
 You can specify the list of columns that must have unique values: `SELECT DISTINCT ON (column1, column2,...)`. If the columns are not specified, all of them are taken into consideration.
 
diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md
index b2e851a78cd..575799cccc4 100644
--- a/docs/ru/development/architecture.md
+++ b/docs/ru/development/architecture.md
@@ -63,7 +63,7 @@ ClickHouse — полноценная столбцовая СУБД. Данны
 
 Для байт-ориентированного ввода-вывода существуют абстрактные классы `ReadBuffer` и `WriteBuffer`. Они используются вместо `iostream`. Не волнуйтесь: каждый зрелый проект C++ использует что-то другое вместо `iostream` по уважительным причинам.
 
-`ReadBuffer` и `WriteBuffer` — это просто непрерывный буфер и курсор, указывающий на позицию в этом буфере. Реализации могут как владеть так и не владеть памятью буфера. Существует виртуальный метод заполнения буфера следующими данными (для `ReadBuffer`) или сброса буфера куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываются.
+`ReadBuffer` и `WriteBuffer` — это просто непрерывный буфер и курсор, указывающий на позицию в этом буфере. Реализации могут как владеть, так и не владеть памятью буфера. Существует виртуальный метод заполнения буфера следующими данными (для `ReadBuffer`) или сброса буфера куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываются.
 
 Реализации `ReadBuffer`/`WriteBuffer` используются для работы с файлами и файловыми дескрипторами, а также сетевыми сокетами, для реализации сжатия (`CompressedWriteBuffer` инициализируется вместе с другим `WriteBuffer` и осуществляет сжатие данных перед записью в него), и для других целей – названия `ConcatReadBuffer`, `LimitReadBuffer`, и `HashingWriteBuffer` говорят сами за себя.
 
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index c63622594e4..01ff4dd5f28 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -71,7 +71,7 @@ ClickHouse не работает и не собирается на 32-битны
     Please make sure you have the correct access rights
     and the repository exists.
 
-Как правило это означает, что отсутствуют ssh ключи для соединения с GitHub. Ключи расположены в директории `~/.ssh`. В интерфейсе GitHub, в настройках, необходимо загрузить публичные ключи, чтобы он их понимал.
+Как правило, это означает, что отсутствуют ssh ключи для соединения с GitHub. Ключи расположены в директории `~/.ssh`. В интерфейсе GitHub, в настройках, необходимо загрузить публичные ключи, чтобы он их понимал.
 
 Вы также можете клонировать репозиторий по протоколу https:
 
@@ -199,7 +199,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 
 В случае успешного запуска, вы увидите прогресс сборки - количество обработанных задач и общее количество задач.
 
-В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения.
+В процессе сборки могут появиться сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения.
 
 При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/programs/clickhouse`:
 
@@ -207,7 +207,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 
 ## Запуск собранной версии ClickHouse {#zapusk-sobrannoi-versii-clickhouse}
 
-Для запуска сервера из под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/programs/server/` (эта директория находится не в директории build) и выполните:
+Для запуска сервера из-под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/programs/server/` (эта директория находится не в директории build) и выполните:
 
     ../../build/programs/clickhouse server
 
diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
index cfafddf0bc2..4a7d81d38fc 100644
--- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
@@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 **Секции запроса**
 
-При создании таблицы с движком `CollapsingMergeTree` используются те же [секции запроса](mergetree.md#table_engine-mergetree-creating-a-table) что и при создании таблицы с движком `MergeTree`.
+При создании таблицы с движком `CollapsingMergeTree` используются те же [секции запроса](mergetree.md#table_engine-mergetree-creating-a-table), что и при создании таблицы с движком `MergeTree`.
 
 <details markdown="1">
 
diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md
index 1fd8483e54d..3d2f1ee850d 100644
--- a/docs/ru/engines/table-engines/special/buffer.md
+++ b/docs/ru/engines/table-engines/special/buffer.md
@@ -42,7 +42,7 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
 В качестве имени базы данных и имени таблицы можно указать пустые строки в одинарных кавычках. Это обозначает отсутствие таблицы назначения. В таком случае, при достижении условий на сброс данных, буфер будет просто очищаться. Это может быть полезным, чтобы хранить в оперативке некоторое окно данных.
 
 При чтении из таблицы типа Buffer, будут обработаны данные, как находящиеся в буфере, так и данные из таблицы назначения (если такая есть).
-Но следует иметь ввиду, что таблица Buffer не поддерживает индекс. То есть, данные в буфере будут просканированы полностью, что может быть медленно для буферов большого размера. (Для данных в подчинённой таблице, будет использоваться тот индекс, который она поддерживает.)
+Но следует иметь в виду, что таблица Buffer не поддерживает индекс. То есть, данные в буфере будут просканированы полностью, что может быть медленно для буферов большого размера. (Для данных в подчинённой таблице, будет использоваться тот индекс, который она поддерживает.)
 
 Если множество столбцов таблицы Buffer не совпадает с множеством столбцов подчинённой таблицы, то будут вставлено подмножество столбцов, которое присутствует в обеих таблицах.
 
@@ -66,4 +66,4 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
 
 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться.
 
-Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance.md).
+Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance.md)).
diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md
index 8910c258788..4d19cf50ae1 100644
--- a/docs/ru/interfaces/cli.md
+++ b/docs/ru/interfaces/cli.md
@@ -177,11 +177,11 @@ URI позволяет подключаться к нескольким хост
 
 
-Строка подключения должна быть указана в первом аргументе clickhouse-client. Строка подключения может комбинироваться с другими [параметрами командной строки] (#command-line-options) кроме `--host/-h` и `--port`.
+Строка подключения должна быть указана в первом аргументе clickhouse-client. Строка подключения может комбинироваться с другими [параметрами командной строки](#command-line-options) кроме `--host/-h` и `--port`.
 
 Для компонента `query_parameter` разрешены следующие ключи:
 
-- `secure` или сокращенно `s` - без значение. Если параметр указан, то соединение с сервером будет осуществляться по защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options).
+- `secure` или сокращенно `s` - без значения. Если параметр указан, то соединение с сервером будет осуществляться по защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options).
 
 ### Кодирование URI {#connection_string_uri_percent_encoding}
 
@@ -206,7 +206,7 @@ clickhouse-client clickhouse://john:secret@127.0.0.1:9000
 clickhouse-client clickhouse://[::1]:9000
 ```
 
-Подключиться к localhost через порт 9000  многострочном режиме.
+Подключиться к localhost через порт 9000 в многострочном режиме.
 
 ``` bash
 clickhouse-client clickhouse://localhost:9000 '-m'
diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md
index 3a931529b32..9f1301d817d 100644
--- a/docs/ru/operations/clickhouse-keeper.md
+++ b/docs/ru/operations/clickhouse-keeper.md
@@ -69,7 +69,7 @@ ClickHouse Keeper может использоваться как равноце
 
 
 :::note
-В случае изменения топологии кластера ClickHouse Keeper(например, замены сервера), удостоверьтесь, что вы сохраняеете отношение `server_id` - `hostname`, не переиспользуете существующие `server_id` для для новых серверов и не перемешиваете идентификаторы. Подобные ошибки могут случаться, если вы используете автоматизацию при разворачивании кластера без логики сохранения идентификаторов.
+В случае изменения топологии кластера ClickHouse Keeper(например, замены сервера), удостоверьтесь, что вы сохраняеете отношение `server_id` - `hostname`, не переиспользуете существующие `server_id` для новых серверов и не перемешиваете идентификаторы. Подобные ошибки могут случаться, если вы используете автоматизацию при разворачивании кластера без логики сохранения идентификаторов.
 :::
 
 Примеры конфигурации кворума с тремя узлами можно найти в [интеграционных тестах](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) с префиксом `test_keeper_`. Пример конфигурации для сервера №1:
@@ -337,7 +337,7 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --
 
 После того, как выполнили действия выше выполните следующие шаги.
 1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные с этой ноды будут использованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием.
-2. Перед дальнейшими действиям сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`.
+2. Перед дальнейшими действиями сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`.
 3. Измените настройки на всех нодах кластера, которые вы собираетесь использовать.
 4. Отправьте команду `rcvr` на ноду, которую вы выбрали, или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления.
 5. Запускайте остальные ноды кластера по одной и проверяйте, что команда `mntr` возвращает `follower` в выводе состояния `zk_server_state` перед тем, как запустить следующую ноду.
diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md
index 3b037521692..74f7d217fb7 100644
--- a/docs/ru/operations/configuration-files.md
+++ b/docs/ru/operations/configuration-files.md
@@ -89,7 +89,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
 
 Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать.
 
-В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
+В отличие от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
 
 Пример:
 
@@ -110,7 +110,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
 </clickhouse>
 ```
 
-Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
+Чтобы получить зашифрованное значение, может быть использовано приложение-пример `encrypt_decrypt` .
 
 Пример:
 
diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md
index 73de78d1c15..eb342bea9a7 100644
--- a/docs/ru/operations/utilities/clickhouse-benchmark.md
+++ b/docs/ru/operations/utilities/clickhouse-benchmark.md
@@ -50,7 +50,7 @@ clickhouse-benchmark [keys] < queries_file;
 -   `-r`, `--randomize` — использовать случайный порядок выполнения запросов при наличии более одного входного запроса.
 -   `-s`, `--secure` — используется `TLS` соединение.
 -   `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: 0 (лимит отключен).
--   `--confidence=N` — уровень доверия для  T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [режиме сравнения](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный  t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия.
+-   `--confidence=N` — уровень доверия для  T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [режиме сравнения](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный  t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test), чтобы определить, различны ли две выборки при выбранном уровне доверия.
 -   `--cumulative` — выводить статистику за все время работы, а не за последний временной интервал.
 -   `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`.
 -   `--json=FILEPATH` — дополнительный вывод в формате `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит отчет в указанный JSON-файл.
diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md
index 80d844a1713..57f24786bb7 100644
--- a/docs/ru/sql-reference/data-types/datetime.md
+++ b/docs/ru/sql-reference/data-types/datetime.md
@@ -33,7 +33,7 @@ ClickHouse отображает значения в зависимости от
 
 ## Примеры {#primery}
 
-**1.** Создание таблицы с столбцом типа `DateTime` и вставка данных в неё:
+**1.** Создание таблицы со столбцом типа `DateTime` и вставка данных в неё:
 
 ``` sql
 CREATE TABLE dt
diff --git a/docs/ru/sql-reference/functions/arithmetic-functions.md b/docs/ru/sql-reference/functions/arithmetic-functions.md
index 73bac0595e1..ca7a4566c6c 100644
--- a/docs/ru/sql-reference/functions/arithmetic-functions.md
+++ b/docs/ru/sql-reference/functions/arithmetic-functions.md
@@ -172,7 +172,7 @@ multiplyDecimal(a, b[, result_scale])
 ```
 
 :::note    
-Эта функция работают гораздо медленнее обычной `multiply`.
+Эта функция работает гораздо медленнее обычной `multiply`.
 В случае, если нет необходимости иметь фиксированную точность и/или нужны быстрые вычисления, следует использовать [multiply](#multiply).
 :::
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 659e2d3f75e..1f06bdf264a 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -488,7 +488,7 @@ arrayPushBack(array, single_value)
 **Аргументы**
 
 -   `array` – массив.
--   `single_value` – значение добавляемого элемента. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в этом случае функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`.
+-   `single_value` – значение добавляемого элемента. В массив с числами можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в этом случае функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`.
 
 **Пример**
 
@@ -513,7 +513,7 @@ arrayPushFront(array, single_value)
 **Аргументы**
 
 -   `array` – массив.
--   `single_value` – значение добавляемого элемента. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в этом случае функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`.
+-   `single_value` – значение добавляемого элемента. В массив с числами можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в этом случае функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`.
 
 **Пример**
 
diff --git a/docs/ru/sql-reference/statements/select/distinct.md b/docs/ru/sql-reference/statements/select/distinct.md
index 58fe16b16d9..ad310434598 100644
--- a/docs/ru/sql-reference/statements/select/distinct.md
+++ b/docs/ru/sql-reference/statements/select/distinct.md
@@ -92,7 +92,7 @@ ClickHouse поддерживает использование секций `DIS
 
 ## Обработка NULL {#null-processing}
 
-`DISTINCT` работает с [NULL](../../syntax.md#null-literal) как-будто `NULL` — обычное значение и `NULL==NULL`. Другими словами, в результате `DISTINCT`, различные комбинации с `NULL` встретятся только один раз. Это отличается от обработки `NULL` в большинстве других контекстов.
+`DISTINCT` работает с [NULL](../../syntax.md#null-literal) как будто `NULL` — обычное значение и `NULL==NULL`. Другими словами, в результате `DISTINCT`, различные комбинации с `NULL` встретятся только один раз. Это отличается от обработки `NULL` в большинстве других контекстов.
 
 ## Альтернативы {#alternatives}
 
diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md
index f148a21294a..bb22b38f8f9 100644
--- a/docs/ru/sql-reference/table-functions/cluster.md
+++ b/docs/ru/sql-reference/table-functions/cluster.md
@@ -33,7 +33,7 @@ clusterAllReplicas('cluster_name', db, table[, sharding_key])
 
 **Использование макросов**
 
-`cluster_name` может содержать макрос — подстановку в фигурных скобках. Эта подстановка заменяется на соответствующее значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла .
+`cluster_name` может содержать макрос — подстановку в фигурных скобках. Эта подстановка заменяется на соответствующее значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла.
 
 Пример:
 
diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md
index 105a591cf69..0cf91e45e86 100644
--- a/docs/zh/operations/system-tables/dictionaries.md
+++ b/docs/zh/operations/system-tables/dictionaries.md
@@ -20,7 +20,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
     -   `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) 查询，超时，字典配置已更改）。
     -   `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
 -   `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
--   `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
+-   `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
 -   `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”.
 -   `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。
 -   `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。

From 58e5d7876b40879dedecc34d0bcef3f435508b1a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 12 Feb 2024 20:32:23 +0100
Subject: [PATCH 599/884] Allow uuid in replica_path if CREATE TABLE explicitly
 has it

Right now the query with {uuid} macro without ON CLUSTER fails:

    CREATE TABLE x UUID 'aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa' (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple();

There is a workaround right now to use ATTACH instead of CREATE, but
ATTACH is not CREATE.

CREATE still useful for proper RESTORE without ON CLUSTER.

So this patch allows this syntax, but only if UUID had been explicitly
passed in the query. This looks safe.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Parsers/ASTCreateQuery.h                  |  1 +
 src/Parsers/ParserCreateQuery.cpp             |  2 ++
 .../MergeTree/registerStorageMergeTree.cpp    |  3 ++-
 .../02990_rmt_replica_path_uuid.reference     |  4 ++++
 .../02990_rmt_replica_path_uuid.sql           | 23 +++++++++++++++++++
 5 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference
 create mode 100644 tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql

diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 49a0140625c..b1209e72b61 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -96,6 +96,7 @@ public:
     bool is_populate{false};
     bool is_create_empty{false};    /// CREATE TABLE ... EMPTY AS SELECT ...
     bool replace_view{false}; /// CREATE OR REPLACE VIEW
+    bool has_uuid{false}; // CREATE TABLE x UUID '...'
 
     ASTColumns * columns_list = nullptr;
 
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 1f6f68c9d8e..27c6e6258e3 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -684,6 +684,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
         query->database = table_id->getDatabase();
         query->table = table_id->getTable();
         query->uuid = table_id->uuid;
+        query->has_uuid = table_id->uuid != UUIDHelpers::Nil;
 
         if (query->database)
             query->children.push_back(query->database);
@@ -783,6 +784,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
     query->database = table_id->getDatabase();
     query->table = table_id->getTable();
     query->uuid = table_id->uuid;
+    query->has_uuid = table_id->uuid != UUIDHelpers::Nil;
     query->cluster = cluster_str;
 
     if (query->database)
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 8e646e48f16..ca4f19a24e8 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -315,7 +315,8 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated";
 
     /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
-    bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach;
+    /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH)
+    bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach || args.query.has_uuid;
 
     auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name)
     {
diff --git a/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference
new file mode 100644
index 00000000000..5521c015fcf
--- /dev/null
+++ b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference
@@ -0,0 +1,4 @@
+aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa
+/tables/default/aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa/replicas/r1
+aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa
+/tables/default/aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa/replicas/r1
diff --git a/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql
new file mode 100644
index 00000000000..4fcdff2910f
--- /dev/null
+++ b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql
@@ -0,0 +1,23 @@
+-- Tags: no-parallel, no-ordinary-database, no-replicated-database
+-- Tag no-parallel: static UUID
+-- Tag no-ordinary-database: requires UUID
+-- Tag no-replicated-database: executes with ON CLUSTER anyway
+
+-- Ignore "ATTACH TABLE query with full table definition is not recommended"
+-- Ignore BAD_ARGUMENTS
+SET send_logs_level='fatal';
+
+DROP TABLE IF EXISTS x;
+
+ATTACH TABLE x UUID 'aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa' (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple();
+SELECT uuid FROM system.tables WHERE database = currentDatabase() and table = 'x';
+SELECT replica_path FROM system.replicas WHERE database = currentDatabase() and table = 'x';
+DROP TABLE x;
+
+-- {uuid} macro forbidden for CREATE TABLE without explicit UUID
+CREATE TABLE x (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
+
+CREATE TABLE x UUID 'aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa' (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple();
+SELECT uuid FROM system.tables WHERE database = currentDatabase() and table = 'x';
+SELECT replica_path FROM system.replicas WHERE database = currentDatabase() and table = 'x';
+DROP TABLE x;

From 4228eaf44ad6d3bbe22cb8bba280abf50bb44f3c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Feb 2024 08:31:28 +0100
Subject: [PATCH 600/884] Decrease logging level for http retriable errors to
 Warning

Messages like this:

    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:01.596769 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/UserAgent.bin` failed at try 1/10 with bytes read: 311149/378695. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/UserAgent.bin. HTTP status code: 500 Internal Server Error, body: <?xml version="1.0" encoding="UTF-8"?>

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/IO/ReadWriteBufferFromHTTP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index bf5c426f803..a95d42ec7f3 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -552,7 +552,7 @@ bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::nextImpl()
             if (!can_retry_request)
                 throw;
 
-            LOG_ERROR(
+            LOG_WARNING(
                 log,
                 "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
                 "Error: {}. (Current backoff wait is {}/{} ms)",

From f62405b234c13a1b0f1c83f6eef2f8025fe1c388 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Feb 2024 08:33:09 +0100
Subject: [PATCH 601/884] tests: suppress http retriable errors in
 00157_cache_dictionary

CI reports [1]:

    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:01.596769 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/UserAgent.bin` failed at try 1/10 with bytes read: 311149/378695. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/UserAgent.bin. HTTP status code: 500 Internal Server Error, body: <?xml version="1.0" encoding="UTF-8"?>
    2024-02-12 21:25:24 <Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message><RequestId>NSG3VKK6GWDFXMFJ</RequestId><HostId>9WhFfHfE6XtL2+InOZMJWv5ZCUY25Z3P+2EjoMx3I7UxACzLzFSHOUTT4v3EJ+ZTSAklw6Ech7E=</HostId></Error>. (Current backoff wait is 100/10000 ms)
    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:02.527290 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/RefererCategories.size0.bin` failed at try 1/10 with bytes read: 1110527/1246029. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/RefererCategories.size0.bin. HTTP status code: 500 Internal Server Error, body: <?xml version="1.0" encoding="UTF-8"?>
    2024-02-12 21:25:24 <Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message><RequestId>GRRAYKKM9E73JG6A</RequestId><HostId>Ej/aHS4E6GujLjeZugLqgRhpQYLQrZtn4ZwTTz01Z0J3DJmbGQ5Ch6qzXB0NeNLmsSP6BeTy3O8=</HostId></Error>. (Current backoff wait is 100/10000 ms)
    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:04.415182 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/SearchPhrase.bin` failed at try 1/10 with bytes read: 1615554/1843636. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/SearchPhrase.bin. HTTP status code: 500 Internal Server Error, body: <?xml version="1.0" encoding="UTF-8"?>
    2024-02-12 21:25:24 <Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message><RequestId>WP6PWQCNPK22CH2S</RequestId><HostId>gwYgmcnihSvFDXK3mhDfmXpDIebs39EJGCNcsghxwqqWIkvgsWOu5rSqv837589yB7Mmf+2qTcM=</HostId></Error>. (Current backoff wait is 100/10000 ms)
    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:06.228120 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/HistoryLength.bin` failed at try 1/10 with bytes read: 373045/430913. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/HistoryLength.bin. HTTP status code: 500 Internal Server Error, body: <?xml version="1.0" encoding="UTF-8"?>
    2024-02-12 21:25:24 <Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message><RequestId>YMA18Y2A0Z9GKD1H</RequestId><HostId>RzI1ju46ixNRgjffHJNW8fndeSAARUKjyYh1T8a73ehCuyWqES/HpsEQNsc728dUL10cI0A6viU=</HostId></Error>. (Current backoff wait is 100/10000 ms)
    2024-02-12 21:25:24 [5c66f008cd40] 2024.02.12 14:25:07.742813 [ 2530 ] {e71fcb6d-356d-4962-95bd-ef8b8c504e11} <Error> ReadWriteBufferFromHTTP: HTTP request to `https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/OpenstatCampaignID.bin` failed at try 1/10 with bytes read: 16494/20267. Error: DB::HTTPException: Received error from remote server /store/78e/78ebf6a1-d987-4579-b3ec-00c1a087b1f3/201403_1_1_2/OpenstatCampaignID.bin. HTTP status code: 500 Internal Server Error, body: <?

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/59895/c7c05c9881362079fa6a02430fc5cd63dfa65bb7/stateful_tests__debug_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/queries/1_stateful/00157_cache_dictionary.sql | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql
index 3621ff82126..9699843af8f 100644
--- a/tests/queries/1_stateful/00157_cache_dictionary.sql
+++ b/tests/queries/1_stateful/00157_cache_dictionary.sql
@@ -1,5 +1,8 @@
 -- Tags: no-tsan, no-parallel
 
+-- Suppress "ReadWriteBufferFromHTTP: HTTP request to `{}` failed at try 1/10 with bytes read: 311149/378695. Error: DB::HTTPException: Received error from remote server {}. (Current backoff wait is 100/10000 ms)" errors
+SET send_logs_level='error';
+
 DROP TABLE IF EXISTS test.hits_1m;
 
 CREATE TABLE test.hits_1m AS test.hits

From 12599c4b8e53e1f018f833d9a5af67fce3925601 Mon Sep 17 00:00:00 2001
From: xuzifu666 <1206332514@qq.com>
Date: Tue, 13 Feb 2024 17:35:55 +0800
Subject: [PATCH 602/884] Commands node args should add rvalue to push_back

---
 programs/keeper-client/Commands.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp
index d19e2ffe00f..62b082ce15a 100644
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@@ -115,7 +115,7 @@ bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
     else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected))
         mode = zkutil::CreateMode::PersistentSequential;
 
-    node->args.push_back(mode);
+    node->args.push_back(std::move(mode));
 
     return true;
 }

From 21770b9d1770b31588e3a78facdc3025c6a0e541 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Tue, 13 Feb 2024 10:39:45 +0100
Subject: [PATCH 603/884] Bump server version

---
 tests/analyzer_integration_broken_tests.txt                  | 1 -
 .../test_distributed_backward_compatability/test.py          | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index be39e393ce9..0559cfa926c 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -1,6 +1,5 @@
 test_build_sets_from_multiple_threads/test.py::test_set
 test_concurrent_backups_s3/test.py::test_concurrent_backups
-test_distributed_backward_compatability/test.py::test_distributed_in_tuple
 test_distributed_type_object/test.py::test_distributed_type_object
 test_executable_table_function/test.py::test_executable_function_input_python
 test_mask_sensitive_info/test.py::test_encryption_functions
diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py
index 319a4c08e60..5b60788a4ef 100644
--- a/tests/integration/test_distributed_backward_compatability/test.py
+++ b/tests/integration/test_distributed_backward_compatability/test.py
@@ -7,11 +7,10 @@ cluster = ClickHouseCluster(__file__)
 node_old = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml"],
-    image="yandex/clickhouse-server",
-    tag="20.8.11.17",
+    image="clickhouse/clickhouse-server",
+    tag="23.3",
     stay_alive=True,
     with_installed_binary=True,
-    allow_analyzer=False,
 )
 node_new = cluster.add_instance(
     "node2",

From f3f7f5ca318f3dd082129c2c0b0157a448188d25 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Feb 2024 10:35:00 +0100
Subject: [PATCH 604/884] tests: fix 02981_vertical_merges_memory_usage
 flakiness

Note, that I've checked this change by reverting #59340, i.e.

    diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h
    index 4e56cffa46a..dd3c555f361 100644
    --- a/src/Processors/Transforms/ColumnGathererTransform.h
    +++ b/src/Processors/Transforms/ColumnGathererTransform.h
    @@ -195,7 +195,7 @@ void ColumnGathererStream::gather(Column & column_res)
             }

             source.pos += len;
    -    } while (column_res.size() < block_preferred_size_rows && column_res.byteSize() < block_preferred_size_bytes);
    +    } while (true);
     }

     }

And it fails:

    02981_vertical_merges_memory_usage:                                     [ FAIL ] - result differs with reference:
    --- /src/ch/clickhouse/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference   2024-02-13 10:37:54.771393874 +0100
    +++ /src/ch/clickhouse/tests/queries/0_stateless/02981_vertical_merges_memory_usage.stdout      2024-02-13 10:41:06.781109565 +0100
    @@ -1 +1 @@
    -Vertical       OK
    +Vertical       FAIL: memory usage: 626.59 MiB

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../0_stateless/02981_vertical_merges_memory_usage.sql        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
index 1305f02c044..6ca594ebc7d 100644
--- a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
+++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
@@ -13,8 +13,8 @@ SETTINGS
     merge_max_block_size = 8192,
     merge_max_block_size_bytes = '10M';
 
-INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000);
-INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000);
+INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000);
+INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000);
 
 OPTIMIZE TABLE t_vertical_merge_memory FINAL;
 

From 06f089c480964989bade63183a1800b2e593fa02 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Tue, 13 Feb 2024 10:49:04 +0100
Subject: [PATCH 605/884] Analyzer: Update broken integration tests list

---
 tests/analyzer_integration_broken_tests.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index be39e393ce9..73e6a83a406 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -7,7 +7,6 @@ test_mask_sensitive_info/test.py::test_encryption_functions
 test_merge_table_over_distributed/test.py::test_global_in
 test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
-test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_select_access_rights/test_main.py::test_alias_columns
 test_settings_profile/test.py::test_show_profiles
@@ -16,5 +15,3 @@ test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functi
 test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
 test_user_defined_object_persistence/test.py::test_persistence
 test_wrong_db_or_table_name/test.py::test_wrong_table_name
-test_zookeeper_config/test.py::test_chroot_with_same_root
-test_zookeeper_config/test.py::test_chroot_with_different_root

From 7c0170523102e0571213e383415d2e6171f5e5d6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Feb 2024 10:34:27 +0100
Subject: [PATCH 606/884] Run ValidationChecker for sanitizers build as well

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Analyzer/QueryTreePassManager.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 33411488d66..91e48fe4e86 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -61,7 +61,7 @@ namespace ErrorCodes
 namespace
 {
 
-#ifndef NDEBUG
+#if defined(ABORT_ON_LOGICAL_ERROR)
 
 /** This visitor checks if Query Tree structure is valid after each pass
   * in debug build.
@@ -184,7 +184,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
     for (size_t i = 0; i < passes_size; ++i)
     {
         passes[i]->run(query_tree_node, current_context);
-#ifndef NDEBUG
+#if defined(ABORT_ON_LOGICAL_ERROR)
         ValidationChecker(passes[i]->getName()).visit(query_tree_node);
 #endif
     }
@@ -209,7 +209,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa
     for (size_t i = 0; i < up_to_pass_index; ++i)
     {
         passes[i]->run(query_tree_node, current_context);
-#ifndef NDEBUG
+#if defined(ABORT_ON_LOGICAL_ERROR)
         ValidationChecker(passes[i]->getName()).visit(query_tree_node);
 #endif
     }

From 743f2410c1ba45f0fd99dcb22e2b8448ad44a487 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Feb 2024 09:53:32 +0000
Subject: [PATCH 607/884] Fix crash in JSONColumnsWithMetadata format over http

---
 .../JSONColumnsWithMetadataBlockInputFormat.cpp    |  2 +-
 .../Impl/JSONColumnsWithMetadataBlockInputFormat.h |  2 +-
 ...02982_json_columns_with_metadata_http.reference |  3 +++
 .../02982_json_columns_with_metadata_http.sh       | 14 ++++++++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02982_json_columns_with_metadata_http.reference
 create mode 100755 tests/queries/0_stateless/02982_json_columns_with_metadata_http.sh

diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.cpp
index 23c6114fb39..572b3b0703f 100644
--- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.cpp
@@ -60,7 +60,7 @@ void registerInputFormatJSONColumnsWithMetadata(FormatFactory & factory)
     factory.registerInputFormat(
         "JSONColumnsWithMetadata",
         [](ReadBuffer & buf,
-           const Block &sample,
+           const Block & sample,
            const RowInputFormatParams &,
            const FormatSettings & settings)
         {
diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h
index 9a6ed79c522..265f76a74c1 100644
--- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h
@@ -15,7 +15,7 @@ public:
     bool checkChunkEnd() override;
 
 private:
-    const Block & header;
+    const Block header;
     const bool validate_types_from_metadata;
 };
 
diff --git a/tests/queries/0_stateless/02982_json_columns_with_metadata_http.reference b/tests/queries/0_stateless/02982_json_columns_with_metadata_http.reference
new file mode 100644
index 00000000000..d03bf4df6cb
--- /dev/null
+++ b/tests/queries/0_stateless/02982_json_columns_with_metadata_http.reference
@@ -0,0 +1,3 @@
+1	4
+2	5
+3	6
diff --git a/tests/queries/0_stateless/02982_json_columns_with_metadata_http.sh b/tests/queries/0_stateless/02982_json_columns_with_metadata_http.sh
new file mode 100755
index 00000000000..41c06ceae61
--- /dev/null
+++ b/tests/queries/0_stateless/02982_json_columns_with_metadata_http.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test"
+$CLICKHOUSE_CLIENT -q "create table test(x UInt32, y UInt32) engine=Memory"
+
+echo -ne '{"meta":[{"name":"x","type":"UInt32"}, {"name":"y", "type":"UInt32"}],"data":{"x":[1,2,3],"y":[4,5,6]}}\n' | ${CLICKHOUSE_CURL} -sS "{$CLICKHOUSE_URL}&query=INSERT%20INTO%20test%20FORMAT%20JSONColumnsWithMetadata" --data-binary @-
+
+$CLICKHOUSE_CLIENT -q "select * from test"
+$CLICKHOUSE_CLIENT -q "drop table test"
+

From 4b85f6b39afb2576afd4e6b2698a8fc1587d4e25 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 13 Feb 2024 12:02:46 +0100
Subject: [PATCH 608/884] Revert "Less error prone interface of read buffers"
 (#59911)

---
 src/Disks/DiskEncrypted.cpp                   |  4 +-
 src/Disks/DiskEncryptedTransaction.cpp        |  1 +
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp |  6 +-
 src/Disks/IO/ReadBufferFromRemoteFSGather.h   |  4 +-
 src/Disks/IO/createReadBufferFromFileBase.cpp |  2 +-
 .../ObjectStorages/DiskObjectStorage.cpp      |  2 +-
 src/IO/Archives/LibArchiveReader.cpp          |  7 +--
 src/IO/Archives/ZipArchiveReader.cpp          |  6 --
 src/IO/BoundedReadBuffer.cpp                  |  3 +-
 src/IO/BoundedReadBuffer.h                    | 10 +---
 src/IO/LimitSeekableReadBuffer.h              |  1 +
 src/IO/MMapReadBufferFromFileDescriptor.cpp   |  5 --
 src/IO/MMapReadBufferFromFileDescriptor.h     |  2 -
 src/IO/MMapReadBufferFromFileWithCache.cpp    |  5 --
 src/IO/MMapReadBufferFromFileWithCache.h      |  2 +-
 src/IO/ReadBufferFromEmptyFile.h              |  8 +--
 src/IO/ReadBufferFromEncryptedFile.cpp        | 12 ----
 src/IO/ReadBufferFromEncryptedFile.h          |  4 +-
 src/IO/ReadBufferFromFileBase.h               |  6 --
 src/IO/ReadBufferFromFileDecorator.cpp        | 60 +++++++++++++++++++
 src/IO/ReadBufferFromFileDecorator.h          | 37 ++++++++++++
 src/IO/ReadBufferFromMemory.h                 |  1 +
 src/IO/SeekableReadBuffer.h                   |  6 ++
 src/IO/WithFileSize.cpp                       | 23 ++++++-
 .../HDFS/AsynchronousReadBufferFromHDFS.cpp   |  2 +-
 .../HDFS/AsynchronousReadBufferFromHDFS.h     |  2 +-
 26 files changed, 147 insertions(+), 74 deletions(-)
 create mode 100644 src/IO/ReadBufferFromFileDecorator.cpp
 create mode 100644 src/IO/ReadBufferFromFileDecorator.h

diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp
index fe00fdd64d6..68fd9012857 100644
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@@ -4,9 +4,9 @@
 #include <Disks/DiskFactory.h>
 #include <IO/FileEncryptionCommon.h>
 #include <IO/ReadBufferFromEncryptedFile.h>
+#include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromEncryptedFile.h>
-#include <IO/ReadBufferFromEmptyFile.h>
 #include <boost/algorithm/hex.hpp>
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
@@ -374,7 +374,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
     {
         /// File is empty, that's a normal case, see DiskEncrypted::truncateFile().
         /// There is no header so we just return `ReadBufferFromString("")`.
-        return std::make_unique<ReadBufferFromEmptyFile>(wrapped_path);
+        return std::make_unique<ReadBufferFromFileDecorator>(std::make_unique<ReadBufferFromString>(std::string_view{}), wrapped_path);
     }
     auto encryption_settings = current_settings.get();
     FileEncryption::Header header = readHeader(*buffer);
diff --git a/src/Disks/DiskEncryptedTransaction.cpp b/src/Disks/DiskEncryptedTransaction.cpp
index 3da2e6f925a..daeab7aae6c 100644
--- a/src/Disks/DiskEncryptedTransaction.cpp
+++ b/src/Disks/DiskEncryptedTransaction.cpp
@@ -6,6 +6,7 @@
 #include <Common/Exception.h>
 #include <boost/algorithm/hex.hpp>
 #include <IO/ReadBufferFromEncryptedFile.h>
+#include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromEncryptedFile.h>
 #include <Common/quoteString.h>
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 923aab5c343..0b3ecca3587 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -1,6 +1,6 @@
 #include "ReadBufferFromRemoteFSGather.h"
 
-#include <IO/ReadBufferFromFileBase.h>
+#include <IO/SeekableReadBuffer.h>
 
 #include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
 #include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
@@ -62,7 +62,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
         current_object = blobs_to_read.front();
 }
 
-std::unique_ptr<ReadBufferFromFileBase> ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
+SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
 {
     if (current_buf && !with_cache)
     {
@@ -79,7 +79,7 @@ std::unique_ptr<ReadBufferFromFileBase> ReadBufferFromRemoteFSGather::createImpl
     if (with_cache)
     {
         auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
-        return std::make_unique<CachedOnDiskReadBufferFromFile>(
+        return std::make_shared<CachedOnDiskReadBufferFromFile>(
             object_path,
             cache_key,
             settings.remote_fs_cache,
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
index 93ded9fefb3..f6b7506a54f 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
@@ -53,7 +53,7 @@ public:
     bool isContentCached(size_t offset, size_t size) override;
 
 private:
-    std::unique_ptr<ReadBufferFromFileBase> createImplementationBuffer(const StoredObject & object);
+    SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object);
 
     bool nextImpl() override;
 
@@ -80,7 +80,7 @@ private:
 
     StoredObject current_object;
     size_t current_buf_idx = 0;
-    std::unique_ptr<ReadBufferFromFileBase> current_buf;
+    SeekableReadBufferPtr current_buf;
 
     LoggerPtr log;
 };
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index d4cb6b83223..a9d451496ff 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -39,7 +39,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     size_t alignment)
 {
     if (file_size.has_value() && !*file_size)
-        return std::make_unique<ReadBufferFromEmptyFile>(filename);
+        return std::make_unique<ReadBufferFromEmptyFile>();
 
     size_t estimated_size = 0;
     if (read_hint.has_value())
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 9c4132f433f..141aa74e417 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -531,7 +531,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
     const bool file_can_be_empty = !file_size.has_value() || *file_size == 0;
 
     if (storage_objects.empty() && file_can_be_empty)
-        return std::make_unique<ReadBufferFromEmptyFile>(path);
+        return std::make_unique<ReadBufferFromEmptyFile>();
 
     return object_storage->readObjects(
         storage_objects,
diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp
index 763cd3b171b..94e68045575 100644
--- a/src/IO/Archives/LibArchiveReader.cpp
+++ b/src/IO/Archives/LibArchiveReader.cpp
@@ -228,12 +228,7 @@ public:
 
     off_t getPosition() override
     {
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition is not supported when reading from archive");
-    }
-
-    size_t getFileOffsetOfBufferEnd() const override
-    {
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from archive");
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive");
     }
 
     String getFileName() const override { return handle.getFileName(); }
diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp
index 636042ec586..8c9c37e4ae0 100644
--- a/src/IO/Archives/ZipArchiveReader.cpp
+++ b/src/IO/Archives/ZipArchiveReader.cpp
@@ -15,7 +15,6 @@ namespace ErrorCodes
     extern const int CANNOT_UNPACK_ARCHIVE;
     extern const int LOGICAL_ERROR;
     extern const int SEEK_POSITION_OUT_OF_BOUND;
-    extern const int UNSUPPORTED_METHOD;
     extern const int CANNOT_SEEK_THROUGH_FILE;
 }
 
@@ -253,11 +252,6 @@ public:
         checkResult(err);
     }
 
-    size_t getFileOffsetOfBufferEnd() const override
-    {
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from zip archive");
-    }
-
     off_t seek(off_t off, int whence) override
     {
         off_t current_pos = getPosition();
diff --git a/src/IO/BoundedReadBuffer.cpp b/src/IO/BoundedReadBuffer.cpp
index f3b176a963c..bda79d82ad3 100644
--- a/src/IO/BoundedReadBuffer.cpp
+++ b/src/IO/BoundedReadBuffer.cpp
@@ -4,7 +4,8 @@
 namespace DB
 {
 
-BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_) : impl(std::move(impl_))
+BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_)
+    : ReadBufferFromFileDecorator(std::move(impl_))
 {
 }
 
diff --git a/src/IO/BoundedReadBuffer.h b/src/IO/BoundedReadBuffer.h
index 22a6471a9a1..eb65857e83a 100644
--- a/src/IO/BoundedReadBuffer.h
+++ b/src/IO/BoundedReadBuffer.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <IO/ReadBufferFromFileBase.h>
+#include <IO/ReadBufferFromFileDecorator.h>
 
 
 namespace DB
@@ -7,10 +7,10 @@ namespace DB
 
 /// A buffer which allows to make an underlying buffer as right bounded,
 /// e.g. the buffer cannot return data beyond offset specified in `setReadUntilPosition`.
-class BoundedReadBuffer : public ReadBufferFromFileBase
+class BoundedReadBuffer : public ReadBufferFromFileDecorator
 {
 public:
-    explicit BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_);
+    explicit BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_);
 
     bool supportsRightBoundedReads() const override { return true; }
 
@@ -23,8 +23,6 @@ public:
     off_t seek(off_t off, int whence) override;
 
     size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }
-    String getFileName() const override { return impl->getFileName(); }
-    size_t getFileSize() override { return impl->getFileSize(); }
 
     /// file_offset_of_buffer_end can differ from impl's file_offset_of_buffer_end
     /// because of resizing of the tail. => Need to also override getPosition() as
@@ -32,8 +30,6 @@ public:
     off_t getPosition() override;
 
 private:
-    std::unique_ptr<ReadBufferFromFileBase> impl;
-
     std::optional<size_t> read_until_position;
     /// atomic because can be used in log or exception messages while being updated.
     std::atomic<size_t> file_offset_of_buffer_end = 0;
diff --git a/src/IO/LimitSeekableReadBuffer.h b/src/IO/LimitSeekableReadBuffer.h
index 5624388dd7e..61b307c522c 100644
--- a/src/IO/LimitSeekableReadBuffer.h
+++ b/src/IO/LimitSeekableReadBuffer.h
@@ -18,6 +18,7 @@ public:
 
     /// Returns adjusted position, i.e. returns `3` if the position in the nested buffer is `start_offset + 3`.
     off_t getPosition() override;
+
     off_t seek(off_t off, int whence) override;
 
 private:
diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp
index 56a094bb1a3..9b1c132cc01 100644
--- a/src/IO/MMapReadBufferFromFileDescriptor.cpp
+++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp
@@ -92,11 +92,6 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize()
     return getSizeFromFileDescriptor(getFD(), getFileName());
 }
 
-size_t MMapReadBufferFromFileDescriptor::getFileOffsetOfBufferEnd() const
-{
-     return mapped.getOffset() + mapped.getLength();
-}
-
 size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &)
 {
     if (offset >= mapped.getLength())
diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h
index 97d8bbe224d..2a039e04971 100644
--- a/src/IO/MMapReadBufferFromFileDescriptor.h
+++ b/src/IO/MMapReadBufferFromFileDescriptor.h
@@ -36,8 +36,6 @@ public:
 
     std::string getFileName() const override;
 
-    size_t getFileOffsetOfBufferEnd() const override;
-
     int getFD() const;
 
     size_t getFileSize() override;
diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp
index f3c4d6f4e01..d53f3bc325d 100644
--- a/src/IO/MMapReadBufferFromFileWithCache.cpp
+++ b/src/IO/MMapReadBufferFromFileWithCache.cpp
@@ -76,9 +76,4 @@ off_t MMapReadBufferFromFileWithCache::seek(off_t offset, int whence)
     return new_pos;
 }
 
-size_t MMapReadBufferFromFileWithCache::getFileOffsetOfBufferEnd() const
-{
-    return mapped->getOffset() + mapped->getLength();
-}
-
 }
diff --git a/src/IO/MMapReadBufferFromFileWithCache.h b/src/IO/MMapReadBufferFromFileWithCache.h
index ce5da29831e..cb87b03df8d 100644
--- a/src/IO/MMapReadBufferFromFileWithCache.h
+++ b/src/IO/MMapReadBufferFromFileWithCache.h
@@ -19,7 +19,7 @@ public:
     off_t getPosition() override;
     std::string getFileName() const override;
     off_t seek(off_t offset, int whence) override;
-    size_t getFileOffsetOfBufferEnd() const override;
+
     bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; }
 
 private:
diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h
index e2765765c47..f21f2f507dc 100644
--- a/src/IO/ReadBufferFromEmptyFile.h
+++ b/src/IO/ReadBufferFromEmptyFile.h
@@ -14,18 +14,12 @@ namespace DB
 /// - ThreadPoolReader
 class ReadBufferFromEmptyFile : public ReadBufferFromFileBase
 {
-public:
-    explicit ReadBufferFromEmptyFile(const String & file_name_) : file_name(file_name_) {}
-
 private:
-    String file_name;
-
     bool nextImpl() override { return false; }
-    std::string getFileName() const override { return file_name; }
+    std::string getFileName() const override { return "<empty>"; }
     off_t seek(off_t /*off*/, int /*whence*/) override { return 0; }
     off_t getPosition() override { return 0; }
     size_t getFileSize() override { return 0; }
-    size_t getFileOffsetOfBufferEnd() const override { return 0; }
 };
 
 }
diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp
index 6861ae06dd8..f9cf1597153 100644
--- a/src/IO/ReadBufferFromEncryptedFile.cpp
+++ b/src/IO/ReadBufferFromEncryptedFile.cpp
@@ -101,18 +101,6 @@ bool ReadBufferFromEncryptedFile::nextImpl()
     return true;
 }
 
-size_t ReadBufferFromEncryptedFile::getFileSize()
-{
-    size_t size = in->getFileSize();
-    return size > FileEncryption::Header::kSize ? size - FileEncryption::Header::kSize : size;
-}
-
-size_t ReadBufferFromEncryptedFile::getFileOffsetOfBufferEnd() const
-{
-    size_t file_offset = in->getFileOffsetOfBufferEnd();
-    return file_offset > FileEncryption::Header::kSize ? file_offset - FileEncryption::Header::kSize : file_offset;
-}
-
 }
 
 #endif
diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h
index 2f5093153ea..3626daccb3e 100644
--- a/src/IO/ReadBufferFromEncryptedFile.h
+++ b/src/IO/ReadBufferFromEncryptedFile.h
@@ -27,10 +27,10 @@ public:
     std::string getFileName() const override { return in->getFileName(); }
 
     void setReadUntilPosition(size_t position) override { in->setReadUntilPosition(position + FileEncryption::Header::kSize); }
+
     void setReadUntilEnd() override { in->setReadUntilEnd(); }
 
-    size_t getFileSize() override;
-    size_t getFileOffsetOfBufferEnd() const override;
+    size_t getFileSize() override { return in->getFileSize(); }
 
 private:
     bool nextImpl() override;
diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h
index b9288ce6636..296edf9c689 100644
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@@ -60,12 +60,6 @@ public:
     /// file offset and what getPosition() returns.
     virtual bool isRegularLocalFile(size_t * /* out_view_offset */ = nullptr) { return false; }
 
-    /// NOTE: This method should be thread-safe against seek(), since it can be
-    /// used in CachedOnDiskReadBufferFromFile from multiple threads (because
-    /// it first releases the buffer, and then do logging, and so other thread
-    /// can already call seek() which will lead to data-race).
-    virtual size_t getFileOffsetOfBufferEnd() const = 0;
-
 protected:
     std::optional<size_t> file_size;
     ProfileCallback profile_callback;
diff --git a/src/IO/ReadBufferFromFileDecorator.cpp b/src/IO/ReadBufferFromFileDecorator.cpp
new file mode 100644
index 00000000000..9ac0fb4e475
--- /dev/null
+++ b/src/IO/ReadBufferFromFileDecorator.cpp
@@ -0,0 +1,60 @@
+#include <IO/ReadBufferFromFileDecorator.h>
+
+
+namespace DB
+{
+
+ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_)
+    : ReadBufferFromFileDecorator(std::move(impl_), "")
+{
+}
+
+
+ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_)
+    : impl(std::move(impl_)), file_name(file_name_)
+{
+    swap(*impl);
+}
+
+
+std::string ReadBufferFromFileDecorator::getFileName() const
+{
+    if (!file_name.empty())
+        return file_name;
+
+    return getFileNameFromReadBuffer(*impl);
+}
+
+
+off_t ReadBufferFromFileDecorator::getPosition()
+{
+    swap(*impl);
+    auto position = impl->getPosition();
+    swap(*impl);
+    return position;
+}
+
+
+off_t ReadBufferFromFileDecorator::seek(off_t off, int whence)
+{
+    swap(*impl);
+    auto result = impl->seek(off, whence);
+    swap(*impl);
+    return result;
+}
+
+
+bool ReadBufferFromFileDecorator::nextImpl()
+{
+    swap(*impl);
+    auto result = impl->next();
+    swap(*impl);
+    return result;
+}
+
+size_t ReadBufferFromFileDecorator::getFileSize()
+{
+    return getFileSizeFromReadBuffer(*impl);
+}
+
+}
diff --git a/src/IO/ReadBufferFromFileDecorator.h b/src/IO/ReadBufferFromFileDecorator.h
new file mode 100644
index 00000000000..6e62c7f741b
--- /dev/null
+++ b/src/IO/ReadBufferFromFileDecorator.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <IO/ReadBufferFromFileBase.h>
+
+
+namespace DB
+{
+
+/// Delegates all reads to underlying buffer. Doesn't have own memory.
+class ReadBufferFromFileDecorator : public ReadBufferFromFileBase
+{
+public:
+    explicit ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_);
+    ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_);
+
+    std::string getFileName() const override;
+
+    off_t getPosition() override;
+
+    off_t seek(off_t off, int whence) override;
+
+    bool nextImpl() override;
+
+    bool isWithFileSize() const { return dynamic_cast<const WithFileSize *>(impl.get()) != nullptr; }
+
+    const ReadBuffer & getWrappedReadBuffer() const { return *impl; }
+
+    ReadBuffer & getWrappedReadBuffer() { return *impl; }
+
+    size_t getFileSize() override;
+
+protected:
+    std::unique_ptr<SeekableReadBuffer> impl;
+    String file_name;
+};
+
+}
diff --git a/src/IO/ReadBufferFromMemory.h b/src/IO/ReadBufferFromMemory.h
index 6d3f1a2c6e5..ad96e4bfa28 100644
--- a/src/IO/ReadBufferFromMemory.h
+++ b/src/IO/ReadBufferFromMemory.h
@@ -20,6 +20,7 @@ public:
         : SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {}
 
     off_t seek(off_t off, int whence) override;
+
     off_t getPosition() override;
 };
 
diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h
index 1fb66a5aa9f..c002d30e633 100644
--- a/src/IO/SeekableReadBuffer.h
+++ b/src/IO/SeekableReadBuffer.h
@@ -44,6 +44,12 @@ public:
 
     virtual String getInfoForLog() { return ""; }
 
+    /// NOTE: This method should be thread-safe against seek(), since it can be
+    /// used in CachedOnDiskReadBufferFromFile from multiple threads (because
+    /// it first releases the buffer, and then do logging, and so other thread
+    /// can already call seek() which will lead to data-race).
+    virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); }
+
     /// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
     virtual bool supportsRightBoundedReads() const { return false; }
 
diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp
index 435789652dc..3660d962c08 100644
--- a/src/IO/WithFileSize.cpp
+++ b/src/IO/WithFileSize.cpp
@@ -2,6 +2,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/CompressedReadBufferWrapper.h>
 #include <IO/ParallelReadBuffer.h>
+#include <IO/ReadBufferFromFileDecorator.h>
 #include <IO/PeekableReadBuffer.h>
 
 namespace DB
@@ -16,15 +17,23 @@ template <typename T>
 static size_t getFileSize(T & in)
 {
     if (auto * with_file_size = dynamic_cast<WithFileSize *>(&in))
+    {
         return with_file_size->getFileSize();
+    }
 
     throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size");
 }
 
 size_t getFileSizeFromReadBuffer(ReadBuffer & in)
 {
-    if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
+    if (auto * delegate = dynamic_cast<ReadBufferFromFileDecorator *>(&in))
+    {
+        return getFileSize(delegate->getWrappedReadBuffer());
+    }
+    else if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
+    {
         return getFileSize(compressed->getWrappedReadBuffer());
+    }
 
     return getFileSize(in);
 }
@@ -43,7 +52,11 @@ std::optional<size_t> tryGetFileSizeFromReadBuffer(ReadBuffer & in)
 
 bool isBufferWithFileSize(const ReadBuffer & in)
 {
-    if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
+    if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in))
+    {
+        return delegate->isWithFileSize();
+    }
+    else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
     {
         return isBufferWithFileSize(compressed->getWrappedReadBuffer());
     }
@@ -53,7 +66,11 @@ bool isBufferWithFileSize(const ReadBuffer & in)
 
 size_t getDataOffsetMaybeCompressed(const ReadBuffer & in)
 {
-    if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
+    if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in))
+    {
+        return getDataOffsetMaybeCompressed(delegate->getWrappedReadBuffer());
+    }
+    else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
     {
         return getDataOffsetMaybeCompressed(compressed->getWrappedReadBuffer());
     }
diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
index 65df2c020ba..6b6151f5474 100644
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
@@ -37,7 +37,7 @@ namespace ErrorCodes
 
 AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS(
     IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr<ReadBufferFromHDFS> impl_)
-    : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
+    : BufferWithOwnMemory<SeekableReadBuffer>(settings_.remote_fs_buffer_size)
     , reader(reader_)
     , base_priority(settings_.priority)
     , impl(std::move(impl_))
diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
index 1d3e8b8e3e9..10e2749fd4a 100644
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
+++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
@@ -21,7 +21,7 @@ namespace DB
 
 class IAsynchronousReader;
 
-class AsynchronousReadBufferFromHDFS : public ReadBufferFromFileBase
+class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileName, public WithFileSize
 {
 public:
     AsynchronousReadBufferFromHDFS(

From ddf0f228194b50a42908a5730c2cbc2be038d266 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 14:34:16 +0300
Subject: [PATCH 609/884] UniqExactSet read crash fix

---
 src/AggregateFunctions/UniqExactSet.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h
index e8c0de660ff..2a790690f51 100644
--- a/src/AggregateFunctions/UniqExactSet.h
+++ b/src/AggregateFunctions/UniqExactSet.h
@@ -156,7 +156,6 @@ public:
     void read(ReadBuffer & in)
     {
         size_t new_size = 0;
-        auto * const position = in.position();
         readVarUInt(new_size, in);
         if (new_size > 100'000'000'000)
             throw DB::Exception(
@@ -174,8 +173,14 @@ public:
         }
         else
         {
-            in.position() = position; // Rollback position
-            asSingleLevel().read(in);
+            asSingleLevel().reserve(new_size);
+
+            for (size_t i = 0; i < new_size; ++i)
+            {
+                typename SingleLevelSet::Cell x;
+                x.read(in);
+                asSingleLevel().insert(x.getValue());
+            }
         }
     }
 

From 5daab7a8628182770e02d4f91fa315652e9edbbd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 11:37:40 +0000
Subject: [PATCH 610/884] Fixing test.

---
 .../02731_parallel_replicas_join_subquery.reference            | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
index 48ea1154bc3..028cc744170 100644
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference
@@ -58,8 +58,7 @@ U	c	10
 UlI+1		10
 bX?}ix [	Ny]2 G	10
 t<iT	X48q:Z]t0	10
-0	3	SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2`
-0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
+0	3	SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`key` AS `key`, `__table3`.`value1` AS `value1`, `__table3`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table3` PREWHERE (`__table3`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table3`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table3`.`key`, `__table3`.`value1`, `__table3`.`value2`) AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2`
 0	3	SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2`
 0	3	SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2`
 1	1	-- Parallel full query\nSELECT\n    value1,\n    value2,\n    avg(count) AS avg\nFROM\n    (\n        SELECT\n            key,\n            value1,\n            value2,\n            count() AS count\n        FROM join_outer_table\n        INNER JOIN\n        (\n            SELECT\n                key,\n                value1,\n                value2,\n                toUInt64(min(time)) AS start_ts\n            FROM join_inner_table\n            PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n            GROUP BY key, value1, value2\n        ) USING (key)\n        GROUP BY key, value1, value2\n        )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0;

From a4562ea013995326f1953af09fac481a74d642a1 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@gmail.com>
Date: Tue, 13 Feb 2024 12:56:32 +0100
Subject: [PATCH 611/884] Fix description for
 allow_prefetched_read_pool_for_local_filesystem

allow_prefetched_read_pool_for_local_filesystem wrongly used the same description as allow_prefetched_read_pool_for_remote_filesystem
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 44badfefabb..f85c6e7b618 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -774,7 +774,7 @@ class IColumn;
     M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
     M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
     M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
-    M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
+    M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefetched threadpool if all parts are on local filesystem", 0) \
     \
     M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \
     M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \

From a359ceecb55dafcff6d6945f27a8f61ba44a3e57 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Feb 2024 20:31:53 +0300
Subject: [PATCH 612/884] ASOF JOIN use trySort with RadixSort

---
 src/Common/RadixSort.h       | 67 ++++++++++++++++++++++++++++++++++++
 src/Interpreters/RowRefs.cpp | 56 +++++++++++++++---------------
 tests/performance/asof.xml   | 11 ++++--
 3 files changed, 104 insertions(+), 30 deletions(-)

diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h
index 739bec8d9dd..84f88272f19 100644
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@@ -15,6 +15,7 @@
 
 #include <base/bit_cast.h>
 #include <base/extended_types.h>
+#include <base/sort.h>
 #include <Core/Defines.h>
 
 
@@ -214,6 +215,22 @@ private:
     static KeyBits keyToBits(Key x) { return bit_cast<KeyBits>(x); }
     static Key bitsToKey(KeyBits x) { return bit_cast<Key>(x); }
 
+    struct LessComparator
+    {
+        ALWAYS_INLINE bool operator()(Element & lhs, Element & rhs)
+        {
+            return Traits::less(Traits::extractKey(lhs), Traits::extractKey(rhs));
+        }
+    };
+
+    struct GreaterComparator
+    {
+        ALWAYS_INLINE bool operator()(Element & lhs, Element & rhs)
+        {
+            return !Traits::less(Traits::extractKey(lhs), Traits::extractKey(rhs));
+        }
+    };
+
     static ALWAYS_INLINE KeyBits getPart(size_t N, KeyBits x)
     {
         if (Traits::Transform::transform_is_simple)
@@ -504,6 +521,24 @@ private:
             radixSortMSDInternal<PASS>(arr, size, limit);
     }
 
+    template <bool DIRECT_WRITE_TO_DESTINATION, typename Comparator>
+    static void executeLSDWithPDQSortInternal(Element * arr, size_t size, bool reverse, Comparator comparator, Result * destination)
+    {
+        bool try_sort = ::trySort(arr, arr + size, comparator);
+        if (try_sort)
+        {
+            if constexpr (DIRECT_WRITE_TO_DESTINATION)
+            {
+                for (size_t i = 0; i < size; ++i)
+                    destination[i] = Traits::extractResult(arr[i]);
+            }
+
+            return;
+        }
+
+        radixSortLSDInternal<DIRECT_WRITE_TO_DESTINATION>(arr, size, reverse, destination);
+    }
+
 public:
     /** Least significant digit radix sort (stable).
       * This function will sort inplace (modify 'arr')
@@ -529,6 +564,38 @@ public:
         radixSortLSDInternal<true>(arr, size, reverse, destination);
     }
 
+    /** Tries to fast sort elements for common sorting patterns (unstable).
+      * If fast sort cannot be performed, execute least significant digit radix sort.
+      */
+    static void executeLSDWithPDQSort(Element * arr, size_t size)
+    {
+        return executeLSDWithPDQSort(arr, size, false);
+    }
+
+    static void executeLSDWithPDQSort(Element * arr, size_t size, bool reverse)
+    {
+        return executeLSDWithPDQSort(arr, size, reverse, nullptr);
+    }
+
+    static void executeLSDWithPDQSort(Element * arr, size_t size, bool reverse, Result * destination)
+    {
+        if (reverse)
+        {
+
+            if (destination)
+                return executeLSDWithPDQSortInternal<true>(arr, size, reverse, GreaterComparator(), destination);
+            else
+                return executeLSDWithPDQSortInternal<false>(arr, size, reverse, GreaterComparator(), destination);
+        }
+        else
+        {
+            if (destination)
+                return executeLSDWithPDQSortInternal<true>(arr, size, reverse, LessComparator(), destination);
+            else
+                return executeLSDWithPDQSortInternal<false>(arr, size, reverse, LessComparator(), destination);
+        }
+    }
+
     /* Most significant digit radix sort
      * Is not stable, but allows partial sorting.
      * And it's more cache-friendly and usually faster than LSD variant.
diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp
index 4335cde47f9..e90ae7fd1da 100644
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@@ -175,45 +175,45 @@ private:
     // the array becomes immutable
     void sort()
     {
-        if (!sorted.load(std::memory_order_acquire))
+        if (sorted.load(std::memory_order_acquire))
+            return;
+
+        std::lock_guard<std::mutex> l(lock);
+
+        if (sorted.load(std::memory_order_relaxed))
+            return;
+
+        if constexpr (std::is_arithmetic_v<TKey> && !std::is_floating_point_v<TKey>)
         {
-            std::lock_guard<std::mutex> l(lock);
-
-            if (!sorted.load(std::memory_order_relaxed))
+            if (likely(entries.size() > 256))
             {
-                if constexpr (std::is_arithmetic_v<TKey> && !std::is_floating_point_v<TKey>)
+                struct RadixSortTraits : RadixSortNumTraits<TKey>
                 {
-                    if (likely(entries.size() > 256))
-                    {
-                        struct RadixSortTraits : RadixSortNumTraits<TKey>
-                        {
-                            using Element = Entry;
-                            using Result = Element;
+                    using Element = Entry;
+                    using Result = Element;
 
-                            static TKey & extractKey(Element & elem) { return elem.value; }
-                            static Result extractResult(Element & elem) { return elem; }
-                        };
-
-                        if constexpr (is_descending)
-                            RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), true);
-                        else
-                            RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), false);
-
-                        sorted.store(true, std::memory_order_release);
-                        return;
-                    }
-                }
+                    static TKey & extractKey(Element & elem) { return elem.value; }
+                    static Result extractResult(Element & elem) { return elem; }
+                };
 
                 if constexpr (is_descending)
-                    ::sort(entries.begin(), entries.end(), GreaterEntryOperator());
+                    RadixSort<RadixSortTraits>::executeLSDWithPDQSort(entries.data(), entries.size(), true /*reverse*/);
                 else
-                    ::sort(entries.begin(), entries.end(), LessEntryOperator());
-
-                sorted.store(true, std::memory_order_release);
+                    RadixSort<RadixSortTraits>::executeLSDWithPDQSort(entries.data(), entries.size(), false /*reverse*/);
             }
         }
+        else
+        {
+            if constexpr (is_descending)
+                ::sort(entries.begin(), entries.end(), GreaterEntryOperator());
+            else
+                ::sort(entries.begin(), entries.end(), LessEntryOperator());
+        }
+
+        sorted.store(true, std::memory_order_release);
     }
 };
+
 }
 
 AsofRowRefs createAsofRowRef(TypeIndex type, ASOFJoinInequality inequality)
diff --git a/tests/performance/asof.xml b/tests/performance/asof.xml
index d00afaa26b5..2299a4d9092 100644
--- a/tests/performance/asof.xml
+++ b/tests/performance/asof.xml
@@ -43,6 +43,13 @@
     </query>
 
     <substitutions>
+        <substitution>
+            <name>num_unique_sessions</name>
+            <values>
+                <value>1000</value>
+                <value>1000000</value>
+            </values>
+        </substitution>
         <substitution>
             <name>num_rows</name>
             <values>
@@ -56,15 +63,15 @@
         FROM
         (
             SELECT
+                (number % {num_unique_sessions}) AS visitor_id,
                 number AS id,
-                number AS visitor_id
             FROM system.numbers
             LIMIT {num_rows}
         ) AS sessions
         ASOF LEFT JOIN
         (
             SELECT
-                number AS visitor_id,
+                (number % {num_unique_sessions}) AS visitor_id,
                 number AS starting_session_id
             FROM system.numbers
             LIMIT {num_rows}

From bfa7938df9b0af787923a1cd050542c62152f100 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 8 Feb 2024 13:29:17 +0300
Subject: [PATCH 613/884] Fixed tests

---
 src/Common/RadixSort.h       | 20 ++++++++++----------
 src/Interpreters/RowRefs.cpp | 17 +++++++++--------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h
index 84f88272f19..814101393e8 100644
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@@ -522,7 +522,7 @@ private:
     }
 
     template <bool DIRECT_WRITE_TO_DESTINATION, typename Comparator>
-    static void executeLSDWithPDQSortInternal(Element * arr, size_t size, bool reverse, Comparator comparator, Result * destination)
+    static void executeLSDWithTrySortInternal(Element * arr, size_t size, bool reverse, Comparator comparator, Result * destination)
     {
         bool try_sort = ::trySort(arr, arr + size, comparator);
         if (try_sort)
@@ -567,32 +567,32 @@ public:
     /** Tries to fast sort elements for common sorting patterns (unstable).
       * If fast sort cannot be performed, execute least significant digit radix sort.
       */
-    static void executeLSDWithPDQSort(Element * arr, size_t size)
+    static void executeLSDWithTrySort(Element * arr, size_t size)
     {
-        return executeLSDWithPDQSort(arr, size, false);
+        return executeLSDWithTrySort(arr, size, false);
     }
 
-    static void executeLSDWithPDQSort(Element * arr, size_t size, bool reverse)
+    static void executeLSDWithTrySort(Element * arr, size_t size, bool reverse)
     {
-        return executeLSDWithPDQSort(arr, size, reverse, nullptr);
+        return executeLSDWithTrySort(arr, size, reverse, nullptr);
     }
 
-    static void executeLSDWithPDQSort(Element * arr, size_t size, bool reverse, Result * destination)
+    static void executeLSDWithTrySort(Element * arr, size_t size, bool reverse, Result * destination)
     {
         if (reverse)
         {
 
             if (destination)
-                return executeLSDWithPDQSortInternal<true>(arr, size, reverse, GreaterComparator(), destination);
+                return executeLSDWithTrySortInternal<true>(arr, size, reverse, GreaterComparator(), destination);
             else
-                return executeLSDWithPDQSortInternal<false>(arr, size, reverse, GreaterComparator(), destination);
+                return executeLSDWithTrySortInternal<false>(arr, size, reverse, GreaterComparator(), destination);
         }
         else
         {
             if (destination)
-                return executeLSDWithPDQSortInternal<true>(arr, size, reverse, LessComparator(), destination);
+                return executeLSDWithTrySortInternal<true>(arr, size, reverse, LessComparator(), destination);
             else
-                return executeLSDWithPDQSortInternal<false>(arr, size, reverse, LessComparator(), destination);
+                return executeLSDWithTrySortInternal<false>(arr, size, reverse, LessComparator(), destination);
         }
     }
 
diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp
index e90ae7fd1da..c93f18707b3 100644
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@@ -197,18 +197,19 @@ private:
                 };
 
                 if constexpr (is_descending)
-                    RadixSort<RadixSortTraits>::executeLSDWithPDQSort(entries.data(), entries.size(), true /*reverse*/);
+                    RadixSort<RadixSortTraits>::executeLSDWithTrySort(entries.data(), entries.size(), true /*reverse*/);
                 else
-                    RadixSort<RadixSortTraits>::executeLSDWithPDQSort(entries.data(), entries.size(), false /*reverse*/);
+                    RadixSort<RadixSortTraits>::executeLSDWithTrySort(entries.data(), entries.size(), false /*reverse*/);
+
+                sorted.store(true, std::memory_order_release);
+                return;
             }
         }
+
+        if constexpr (is_descending)
+            ::sort(entries.begin(), entries.end(), GreaterEntryOperator());
         else
-        {
-            if constexpr (is_descending)
-                ::sort(entries.begin(), entries.end(), GreaterEntryOperator());
-            else
-                ::sort(entries.begin(), entries.end(), LessEntryOperator());
-        }
+            ::sort(entries.begin(), entries.end(), LessEntryOperator());
 
         sorted.store(true, std::memory_order_release);
     }

From 2caf3f0fbbcd746c4a1de15767c00af94c90750e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 8 Feb 2024 20:29:47 +0300
Subject: [PATCH 614/884] Fixed tests

---
 tests/performance/asof.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/asof.xml b/tests/performance/asof.xml
index 2299a4d9092..61e61be13bb 100644
--- a/tests/performance/asof.xml
+++ b/tests/performance/asof.xml
@@ -64,7 +64,7 @@
         (
             SELECT
                 (number % {num_unique_sessions}) AS visitor_id,
-                number AS id,
+                number AS id
             FROM system.numbers
             LIMIT {num_rows}
         ) AS sessions

From 45779759232dc065ac5952359e6a04de50782931 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 14:40:04 +0300
Subject: [PATCH 615/884] Fixed code review issues

---
 src/Common/RadixSort.h       | 17 ++++++++++++++++-
 src/Interpreters/RowRefs.cpp |  6 +-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h
index 814101393e8..a30e19d8212 100644
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@@ -115,6 +115,11 @@ struct RadixSortFloatTraits
     {
         return x < y;
     }
+
+    static bool greater(Key x, Key y)
+    {
+        return x > y;
+    }
 };
 
 
@@ -149,6 +154,11 @@ struct RadixSortUIntTraits
     {
         return x < y;
     }
+
+    static bool greater(Key x, Key y)
+    {
+        return x > y;
+    }
 };
 
 
@@ -183,6 +193,11 @@ struct RadixSortIntTraits
     {
         return x < y;
     }
+
+    static bool greater(Key x, Key y)
+    {
+        return x > y;
+    }
 };
 
 
@@ -227,7 +242,7 @@ private:
     {
         ALWAYS_INLINE bool operator()(Element & lhs, Element & rhs)
         {
-            return !Traits::less(Traits::extractKey(lhs), Traits::extractKey(rhs));
+            return Traits::greater(Traits::extractKey(lhs), Traits::extractKey(rhs));
         }
     };
 
diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp
index c93f18707b3..9785ba46dab 100644
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@@ -196,11 +196,7 @@ private:
                     static Result extractResult(Element & elem) { return elem; }
                 };
 
-                if constexpr (is_descending)
-                    RadixSort<RadixSortTraits>::executeLSDWithTrySort(entries.data(), entries.size(), true /*reverse*/);
-                else
-                    RadixSort<RadixSortTraits>::executeLSDWithTrySort(entries.data(), entries.size(), false /*reverse*/);
-
+                RadixSort<RadixSortTraits>::executeLSDWithTrySort(entries.data(), entries.size(), is_descending /*reverse*/);
                 sorted.store(true, std::memory_order_release);
                 return;
             }

From d7056d642d6d4d4e136646b18d6db37437dfd81b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 13 Feb 2024 13:53:40 +0100
Subject: [PATCH 616/884] Addressed review comments

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 56 ++++++++++++-----------
 src/Analyzer/TableFunctionNode.h          | 10 +++-
 src/Storages/StorageView.cpp              |  1 +
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 02e70e53c5a..3d20242a70a 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6638,13 +6638,16 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     String database_name = scope.context->getCurrentDatabase();
     String table_name;
 
-    if (table_function_node->getOriginalAST() && table_function_node->getOriginalAST()->as<ASTFunction>())
+    auto function_ast = table_function_node->getOriginalAST() ? table_function_node->getOriginalAST()->as<ASTFunction>() : nullptr;
+    auto &table_function_node_typed = table_function_node->as<TableFunctionNode &>();
+
+    if (function_ast)
     {
-        table_name = table_function_node->getOriginalAST()->as<ASTFunction>()->name;
-        if (table_function_node->getOriginalAST()->as<ASTFunction>()->is_compound_name)
+        table_name = function_ast->name;
+        if (function_ast->is_compound_name)
         {
             std::vector<std::string> parts;
-            splitInto<'.'>(parts, table_function_node->getOriginalAST()->as<ASTFunction>()->name);
+            splitInto<'.'>(parts, function_ast->name);
 
             if (parts.size() == 2)
             {
@@ -6652,35 +6655,34 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 table_name = parts[1];
             }
         }
-    }
 
-    auto & table_function_node_typed = table_function_node->as<TableFunctionNode &>();
-
-    StoragePtr table = table_name.empty() ? nullptr : DatabaseCatalog::instance().tryGetTable({database_name, table_name}, scope.context->getQueryContext());
-    if (table)
-    {
-        if (table.get()->isView() && table->as<StorageView>() && table->as<StorageView>()->isParameterizedView())
+        StoragePtr table = table_name.empty() ? nullptr : DatabaseCatalog::instance().tryGetTable(
+                {database_name, table_name}, scope.context->getQueryContext());
+        if (table)
         {
-            auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
-            NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_function_node->getOriginalAST());
-            StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
+            if (table.get()->isView() && table->as<StorageView>() && table->as<StorageView>()->isParameterizedView())
+            {
+                auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
+                NameToNameMap parameterized_view_values = analyzeFunctionParamValues(
+                        table_function_node->getOriginalAST());
+                StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
 
-            ASTCreateQuery create;
-            create.select = query->as<ASTSelectWithUnionQuery>();
-            auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, scope.context);
-            auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
-                                                     create,
-                                                     ColumnsDescription(sample_block.getNamesAndTypesList()),
-                    /* comment */ "",
-                    /* is_parameterized_view */ true);
-            res->startup();
-            table_function_node->getOriginalAST()->as<ASTFunction>()->prefer_subquery_to_function_formatting = true;
-            table_function_node_typed.resolve(std::move(res), scope.context);
-            return;
+                ASTCreateQuery create;
+                create.select = query->as<ASTSelectWithUnionQuery>();
+                auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, scope.context);
+                auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
+                                                         create,
+                                                         ColumnsDescription(sample_block.getNamesAndTypesList()),
+                        /* comment */ "",
+                        /* is_parameterized_view */ true);
+                res->startup();
+                function_ast->prefer_subquery_to_function_formatting = true;
+                table_function_node_typed.resolve(std::move(res), scope.context);
+                return;
+            }
         }
     }
 
-
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
 
diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h
index ed1a26c4dd4..cb65511bfce 100644
--- a/src/Analyzer/TableFunctionNode.h
+++ b/src/Analyzer/TableFunctionNode.h
@@ -5,6 +5,7 @@
 #include <Storages/IStorage_fwd.h>
 #include <Storages/TableLockHolder.h>
 #include <Storages/StorageSnapshot.h>
+#include <Storages/StorageView.h>
 
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/StorageID.h>
@@ -73,7 +74,14 @@ public:
     /// Returns true, if table function is resolved, false otherwise
     bool isResolved() const
     {
-        return storage != nullptr && table_function != nullptr;
+        /// For parameterized view, we only have storage
+        if (storage)
+            if (storage->as<StorageView>() && storage->as<StorageView>()->isParameterizedView())
+                return true;
+            else
+                return table_function != nullptr;
+        else
+            return false;
     }
 
     /// Get table function, returns nullptr if table function node is not resolved
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 6b80e2450c4..588e5303117 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -114,6 +114,7 @@ StorageView::StorageView(
     StorageInMemoryMetadata storage_metadata;
     if (!is_parameterized_view_)
     {
+        /// If CREATE query is to create parameterized view, then we dont want to set columns
         if (!query.isParameterizedView())
             storage_metadata.setColumns(columns_);
     }

From 8f71d7817c60bec0a050437aa28cd4e28298daea Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 13 Feb 2024 14:57:45 +0100
Subject: [PATCH 617/884] Fix clang build

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 3d20242a70a..ed67250312a 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6638,7 +6638,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     String database_name = scope.context->getCurrentDatabase();
     String table_name;
 
-    auto function_ast = table_function_node->getOriginalAST() ? table_function_node->getOriginalAST()->as<ASTFunction>() : nullptr;
+    auto * function_ast = table_function_node->getOriginalAST() ? table_function_node->getOriginalAST()->as<ASTFunction>() : nullptr;
     auto &table_function_node_typed = table_function_node->as<TableFunctionNode &>();
 
     if (function_ast)

From 4dc6ed3a39fcc67f0615978d8c567fa8a9c26b0c Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 17:53:35 +0300
Subject: [PATCH 618/884] Register StorageMergeTree exception message fix

---
 src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 8e646e48f16..dd212190664 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -194,7 +194,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
     auto add_optional_param = [&](const char * desc)
     {
         ++max_num_params;
-        needed_params += needed_params.empty() ? "\n" : ",\n[";
+        needed_params += needed_params.empty() ? "\n[" : ",\n[";
         needed_params += desc;
         needed_params += "]";
     };

From 037bc043e6cbb6b56d66c8bb94db2335cfdc0cd8 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 13 Feb 2024 16:01:17 +0100
Subject: [PATCH 619/884] Removed lambdas

---
 src/Storages/MergeTree/MergeTask.cpp |  6 ++---
 src/Storages/MergeTree/MergeTask.h   | 38 ++++++++++++++--------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 9cbcdbaaaaa..e6ae63da7e3 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -436,7 +436,7 @@ MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNe
 bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute()
 {
     assert(subtasks_iterator != subtasks.end());
-    if ((*subtasks_iterator)())
+    if ((this->**subtasks_iterator)())
         return true;
 
     /// Move to the next subtask in an array of subtasks
@@ -827,7 +827,7 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
 bool MergeTask::VerticalMergeStage::execute()
 {
     assert(subtasks_iterator != subtasks.end());
-    if ((*subtasks_iterator)())
+    if ((this->**subtasks_iterator)())
         return true;
 
     /// Move to the next subtask in an array of subtasks
@@ -838,7 +838,7 @@ bool MergeTask::VerticalMergeStage::execute()
 bool MergeTask::MergeProjectionsStage::execute()
 {
     assert(subtasks_iterator != subtasks.end());
-    if ((*subtasks_iterator)())
+    if ((this->**subtasks_iterator)())
         return true;
 
     /// Move to the next subtask in an array of subtasks
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index 6f5336baaad..ad5fe3eb8ce 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -246,15 +246,15 @@ private:
         bool prepare();
         bool executeImpl();
 
-        using ExecuteAndFinalizeHorizontalPartSubtasks = std::array<std::function<bool()>, 2>;
+        using ExecuteAndFinalizeHorizontalPartSubtasks = std::array<bool(ExecuteAndFinalizeHorizontalPart::*)(), 2>;
 
-        ExecuteAndFinalizeHorizontalPartSubtasks subtasks
+        const ExecuteAndFinalizeHorizontalPartSubtasks subtasks
         {
-            [this] () { return prepare(); },
-            [this] () { return executeImpl(); }
+            &ExecuteAndFinalizeHorizontalPart::prepare,
+            &ExecuteAndFinalizeHorizontalPart::executeImpl
         };
 
-        ExecuteAndFinalizeHorizontalPartSubtasks::iterator subtasks_iterator = subtasks.begin();
+        ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin();
 
 
         MergeAlgorithm chooseMergeAlgorithm() const;
@@ -323,16 +323,16 @@ private:
         bool executeVerticalMergeForAllColumns() const;
         bool finalizeVerticalMergeForAllColumns() const;
 
-        using VerticalMergeStageSubtasks = std::array<std::function<bool()>, 3>;
+        using VerticalMergeStageSubtasks = std::array<bool(VerticalMergeStage::*)()const, 3>;
 
-        VerticalMergeStageSubtasks subtasks
+        const VerticalMergeStageSubtasks subtasks
         {
-            [this] () { return prepareVerticalMergeForAllColumns(); },
-            [this] () { return executeVerticalMergeForAllColumns(); },
-            [this] () { return finalizeVerticalMergeForAllColumns(); }
+            &VerticalMergeStage::prepareVerticalMergeForAllColumns,
+            &VerticalMergeStage::executeVerticalMergeForAllColumns,
+            &VerticalMergeStage::finalizeVerticalMergeForAllColumns
         };
 
-        VerticalMergeStageSubtasks::iterator subtasks_iterator = subtasks.begin();
+        VerticalMergeStageSubtasks::const_iterator subtasks_iterator = subtasks.begin();
 
         void prepareVerticalMergeForOneColumn() const;
         bool executeVerticalMergeForOneColumn() const;
@@ -373,16 +373,16 @@ private:
         bool executeProjections() const;
         bool finalizeProjectionsAndWholeMerge() const;
 
-        using MergeProjectionsStageSubtasks = std::array<std::function<bool()>, 3>;
+        using MergeProjectionsStageSubtasks = std::array<bool(MergeProjectionsStage::*)()const, 3>;
 
-        MergeProjectionsStageSubtasks subtasks
+        const MergeProjectionsStageSubtasks subtasks
         {
-            [this] () { return mergeMinMaxIndexAndPrepareProjections(); },
-            [this] () { return executeProjections(); },
-            [this] () { return finalizeProjectionsAndWholeMerge(); }
+            &MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections,
+            &MergeProjectionsStage::executeProjections,
+            &MergeProjectionsStage::finalizeProjectionsAndWholeMerge
         };
 
-        MergeProjectionsStageSubtasks::iterator subtasks_iterator = subtasks.begin();
+        MergeProjectionsStageSubtasks::const_iterator subtasks_iterator = subtasks.begin();
 
         MergeProjectionsRuntimeContextPtr ctx;
         GlobalRuntimeContextPtr global_ctx;
@@ -392,14 +392,14 @@ private:
 
     using Stages = std::array<StagePtr, 3>;
 
-    Stages stages
+    const Stages stages
     {
         std::make_shared<ExecuteAndFinalizeHorizontalPart>(),
         std::make_shared<VerticalMergeStage>(),
         std::make_shared<MergeProjectionsStage>()
     };
 
-    Stages::iterator stages_iterator = stages.begin();
+    Stages::const_iterator stages_iterator = stages.begin();
 
     /// Check for persisting block number column
     static bool supportsBlockNumberColumn(GlobalRuntimeContextPtr global_ctx)

From 0d151e3f0d325b4396e47e7cf06e353f7d31f01c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 13 Feb 2024 16:01:50 +0100
Subject: [PATCH 620/884] Send only specific events/metrics for Keeper
 prometheus

---
 programs/keeper/Keeper.cpp              |  38 ++-
 src/Common/CurrentMetrics.cpp           |   2 +
 src/Common/ProfileEvents.cpp            |   2 +
 src/Coordination/KeeperConstants.cpp    | 376 ++++++++++++++++++++++++
 src/Server/HTTPHandlerFactory.cpp       |   9 +-
 src/Server/HTTPHandlerFactory.h         |   7 +-
 src/Server/PrometheusMetricsWriter.cpp  | 145 +++++----
 src/Server/PrometheusMetricsWriter.h    |  22 +-
 src/Server/PrometheusRequestHandler.cpp |  12 +-
 src/Server/PrometheusRequestHandler.h   |   6 +-
 10 files changed, 532 insertions(+), 87 deletions(-)
 create mode 100644 src/Coordination/KeeperConstants.cpp

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index c751702dc6f..5cc73437dba 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -31,9 +31,10 @@
 #include <Coordination/KeeperAsynchronousMetrics.h>
 
 #include <Server/HTTP/HTTPServer.h>
-#include <Server/TCPServer.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/KeeperReadinessHandler.h>
+#include <Server/PrometheusMetricsWriter.h>
+#include <Server/TCPServer.h>
 
 #include "Core/Defines.h"
 #include "config.h"
@@ -482,19 +483,28 @@ try
 
         /// Prometheus (if defined and not setup yet with http_port)
         port_name = "prometheus.port";
-        createServer(listen_host, port_name, listen_try, [&, my_http_context = std::move(http_context)](UInt16 port) mutable
-        {
-            Poco::Net::ServerSocket socket;
-            auto address = socketBindListen(socket, listen_host, port);
-            socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
-            socket.setSendTimeout(my_http_context->getSendTimeout());
-            servers->emplace_back(
-                listen_host,
-                port_name,
-                "Prometheus: http://" + address.toString(),
-                std::make_unique<HTTPServer>(
-                    std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
-        });
+        createServer(
+            listen_host,
+            port_name,
+            listen_try,
+            [&, my_http_context = std::move(http_context)](UInt16 port) mutable
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(socket, listen_host, port);
+                socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
+                socket.setSendTimeout(my_http_context->getSendTimeout());
+                auto metrics_writer = std::make_shared<KeeperPrometheusMetricsWriter>(config, "prometheus", async_metrics);
+                servers->emplace_back(
+                    listen_host,
+                    port_name,
+                    "Prometheus: http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        std::move(my_http_context),
+                        createPrometheusMainHandlerFactory(*this, config_getter(), metrics_writer, "PrometheusHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
+            });
 
         /// HTTP control endpoints
         port_name = "keeper_server.http_control.port";
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index f56149ed464..c6fbafa8dc3 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -2,6 +2,8 @@
 
 
 /// Available metrics. Add something here as you wish.
+/// If the metric is generic (i.e. not server specific)
+/// it should be also added to src/Coordination/KeeperConstant.cpp
 #define APPLY_FOR_BUILTIN_METRICS(M) \
     M(Query, "Number of executing queries") \
     M(Merge, "Number of executing background merges") \
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 2476a69a513..7843a76620a 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -4,6 +4,8 @@
 
 
 /// Available events. Add something here as you wish.
+/// If the event is generic (i.e. not server specific)
+/// it should be also added to src/Coordination/KeeperConstant.cpp
 #define APPLY_FOR_BUILTIN_EVENTS(M) \
     M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \
     M(SelectQuery, "Same as Query, but only for SELECT queries.") \
diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp
new file mode 100644
index 00000000000..2aa84b691c4
--- /dev/null
+++ b/src/Coordination/KeeperConstants.cpp
@@ -0,0 +1,376 @@
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentMetrics.h>
+
+/// Events which are useful for Keeper.
+/// New events should be added manually.
+#define APPLY_FOR_KEEPER_PROFILE_EVENTS(M) \
+    M(FileOpen) \
+    M(Seek) \
+    M(ReadBufferFromFileDescriptorRead) \
+    M(ReadBufferFromFileDescriptorReadFailed) \
+    M(ReadBufferFromFileDescriptorReadBytes) \
+    M(WriteBufferFromFileDescriptorWrite) \
+    M(WriteBufferFromFileDescriptorWriteFailed) \
+    M(WriteBufferFromFileDescriptorWriteBytes) \
+    M(FileSync) \
+    M(DirectorySync) \
+    M(FileSyncElapsedMicroseconds) \
+    M(DirectorySyncElapsedMicroseconds) \
+    M(ReadCompressedBytes) \
+    M(CompressedReadBufferBlocks) \
+    M(CompressedReadBufferBytes) \
+    M(AIOWrite) \
+    M(AIOWriteBytes) \
+    M(AIORead) \
+    M(AIOReadBytes) \
+    M(IOBufferAllocs) \
+    M(IOBufferAllocBytes) \
+    M(ArenaAllocChunks) \
+    M(ArenaAllocBytes) \
+    M(CreatedReadBufferOrdinary) \
+    M(CreatedReadBufferDirectIO) \
+    M(CreatedReadBufferDirectIOFailed) \
+    M(CreatedReadBufferMMap) \
+    M(CreatedReadBufferMMapFailed) \
+    M(DiskReadElapsedMicroseconds) \
+    M(DiskWriteElapsedMicroseconds) \
+    M(NetworkReceiveElapsedMicroseconds) \
+    M(NetworkSendElapsedMicroseconds) \
+    M(NetworkReceiveBytes) \
+    M(NetworkSendBytes) \
+\
+    M(DiskS3GetRequestThrottlerCount) \
+    M(DiskS3GetRequestThrottlerSleepMicroseconds) \
+    M(DiskS3PutRequestThrottlerCount) \
+    M(DiskS3PutRequestThrottlerSleepMicroseconds) \
+    M(S3GetRequestThrottlerCount) \
+    M(S3GetRequestThrottlerSleepMicroseconds) \
+    M(S3PutRequestThrottlerCount) \
+    M(S3PutRequestThrottlerSleepMicroseconds) \
+    M(RemoteReadThrottlerBytes) \
+    M(RemoteReadThrottlerSleepMicroseconds) \
+    M(RemoteWriteThrottlerBytes) \
+    M(RemoteWriteThrottlerSleepMicroseconds) \
+    M(LocalReadThrottlerBytes) \
+    M(LocalReadThrottlerSleepMicroseconds) \
+    M(LocalWriteThrottlerBytes) \
+    M(LocalWriteThrottlerSleepMicroseconds) \
+    M(ThrottlerSleepMicroseconds) \
+\
+    M(SlowRead) \
+    M(ReadBackoff) \
+\
+    M(ContextLock) \
+    M(ContextLockWaitMicroseconds) \
+\
+    M(RWLockAcquiredReadLocks) \
+    M(RWLockAcquiredWriteLocks) \
+    M(RWLockReadersWaitMilliseconds) \
+    M(RWLockWritersWaitMilliseconds) \
+    M(DNSError) \
+    M(RealTimeMicroseconds) \
+    M(UserTimeMicroseconds) \
+    M(SystemTimeMicroseconds) \
+    M(MemoryOvercommitWaitTimeMicroseconds) \
+    M(MemoryAllocatorPurge) \
+    M(MemoryAllocatorPurgeTimeMicroseconds) \
+    M(SoftPageFaults) \
+    M(HardPageFaults) \
+\
+    M(OSIOWaitMicroseconds) \
+    M(OSCPUWaitMicroseconds) \
+    M(OSCPUVirtualTimeMicroseconds) \
+    M(OSReadBytes) \
+    M(OSWriteBytes) \
+    M(OSReadChars) \
+    M(OSWriteChars) \
+\
+    M(PerfCpuCycles) \
+    M(PerfInstructions) \
+    M(PerfCacheReferences) \
+    M(PerfCacheMisses) \
+    M(PerfBranchInstructions) \
+    M(PerfBranchMisses) \
+    M(PerfBusCycles) \
+    M(PerfStalledCyclesFrontend) \
+    M(PerfStalledCyclesBackend) \
+    M(PerfRefCpuCycles) \
+\
+    M(PerfCpuClock) \
+    M(PerfTaskClock) \
+    M(PerfContextSwitches) \
+    M(PerfCpuMigrations) \
+    M(PerfAlignmentFaults) \
+    M(PerfEmulationFaults) \
+    M(PerfMinEnabledTime) \
+    M(PerfMinEnabledRunningTime) \
+    M(PerfDataTLBReferences) \
+    M(PerfDataTLBMisses) \
+    M(PerfInstructionTLBReferences) \
+    M(PerfInstructionTLBMisses) \
+    M(PerfLocalMemoryReferences) \
+    M(PerfLocalMemoryMisses) \
+\
+    M(CreatedHTTPConnections) \
+    M(CannotWriteToWriteBufferDiscard) \
+\
+    M(S3ReadMicroseconds) \
+    M(S3ReadRequestsCount) \
+    M(S3ReadRequestsErrors) \
+    M(S3ReadRequestsThrottling) \
+    M(S3ReadRequestsRedirects) \
+\
+    M(S3WriteMicroseconds) \
+    M(S3WriteRequestsCount) \
+    M(S3WriteRequestsErrors) \
+    M(S3WriteRequestsThrottling) \
+    M(S3WriteRequestsRedirects) \
+\
+    M(DiskS3ReadMicroseconds) \
+    M(DiskS3ReadRequestsCount) \
+    M(DiskS3ReadRequestsErrors) \
+    M(DiskS3ReadRequestsThrottling) \
+    M(DiskS3ReadRequestsRedirects) \
+\
+    M(DiskS3WriteMicroseconds) \
+    M(DiskS3WriteRequestsCount) \
+    M(DiskS3WriteRequestsErrors) \
+    M(DiskS3WriteRequestsThrottling) \
+    M(DiskS3WriteRequestsRedirects) \
+\
+    M(S3DeleteObjects) \
+    M(S3CopyObject) \
+    M(S3ListObjects) \
+    M(S3HeadObject) \
+    M(S3GetObjectAttributes) \
+    M(S3CreateMultipartUpload) \
+    M(S3UploadPartCopy) \
+    M(S3UploadPart) \
+    M(S3AbortMultipartUpload) \
+    M(S3CompleteMultipartUpload) \
+    M(S3PutObject) \
+    M(S3GetObject) \
+\
+    M(AzureUploadPart) \
+    M(DiskAzureUploadPart) \
+    M(AzureCopyObject) \
+    M(DiskAzureCopyObject) \
+    M(AzureDeleteObjects) \
+    M(AzureListObjects) \
+\
+    M(DiskS3DeleteObjects) \
+    M(DiskS3CopyObject) \
+    M(DiskS3ListObjects) \
+    M(DiskS3HeadObject) \
+    M(DiskS3GetObjectAttributes) \
+    M(DiskS3CreateMultipartUpload) \
+    M(DiskS3UploadPartCopy) \
+    M(DiskS3UploadPart) \
+    M(DiskS3AbortMultipartUpload) \
+    M(DiskS3CompleteMultipartUpload) \
+    M(DiskS3PutObject) \
+    M(DiskS3GetObject) \
+\
+    M(S3Clients) \
+    M(TinyS3Clients) \
+\
+    M(ReadBufferFromS3Microseconds) \
+    M(ReadBufferFromS3InitMicroseconds) \
+    M(ReadBufferFromS3Bytes) \
+    M(ReadBufferFromS3RequestsErrors) \
+    M(ReadBufferFromS3ResetSessions) \
+    M(ReadBufferFromS3PreservedSessions) \
+\
+    M(ReadWriteBufferFromHTTPPreservedSessions) \
+\
+    M(WriteBufferFromS3Microseconds) \
+    M(WriteBufferFromS3Bytes) \
+    M(WriteBufferFromS3RequestsErrors) \
+    M(WriteBufferFromS3WaitInflightLimitMicroseconds) \
+    M(RemoteFSSeeks) \
+    M(RemoteFSPrefetches) \
+    M(RemoteFSCancelledPrefetches) \
+    M(RemoteFSUnusedPrefetches) \
+    M(RemoteFSPrefetchedReads) \
+    M(RemoteFSPrefetchedBytes) \
+    M(RemoteFSUnprefetchedReads) \
+    M(RemoteFSUnprefetchedBytes) \
+    M(RemoteFSLazySeeks) \
+    M(RemoteFSSeeksWithReset) \
+    M(RemoteFSBuffers) \
+\
+    M(ThreadpoolReaderTaskMicroseconds) \
+    M(ThreadpoolReaderPrepareMicroseconds) \
+    M(ThreadpoolReaderReadBytes) \
+    M(ThreadpoolReaderSubmit) \
+    M(ThreadpoolReaderSubmitReadSynchronously) \
+    M(ThreadpoolReaderSubmitReadSynchronouslyBytes) \
+    M(ThreadpoolReaderSubmitReadSynchronouslyMicroseconds) \
+    M(ThreadpoolReaderSubmitLookupInCacheMicroseconds) \
+    M(AsynchronousReaderIgnoredBytes) \
+\
+    M(FileSegmentWaitReadBufferMicroseconds) \
+    M(FileSegmentReadMicroseconds) \
+    M(FileSegmentCacheWriteMicroseconds) \
+    M(FileSegmentPredownloadMicroseconds) \
+    M(FileSegmentUsedBytes) \
+\
+    M(ReadBufferSeekCancelConnection) \
+\
+    M(SleepFunctionCalls) \
+    M(SleepFunctionMicroseconds) \
+    M(SleepFunctionElapsedMicroseconds) \
+\
+    M(ThreadPoolReaderPageCacheHit) \
+    M(ThreadPoolReaderPageCacheHitBytes) \
+    M(ThreadPoolReaderPageCacheHitElapsedMicroseconds) \
+    M(ThreadPoolReaderPageCacheMiss) \
+    M(ThreadPoolReaderPageCacheMissBytes) \
+    M(ThreadPoolReaderPageCacheMissElapsedMicroseconds) \
+\
+    M(AsynchronousReadWaitMicroseconds) \
+    M(SynchronousReadWaitMicroseconds) \
+    M(AsynchronousRemoteReadWaitMicroseconds) \
+    M(SynchronousRemoteReadWaitMicroseconds) \
+\
+    M(ExternalDataSourceLocalCacheReadBytes) \
+\
+    M(MainConfigLoads) \
+\
+    M(KeeperPacketsSent) \
+    M(KeeperPacketsReceived) \
+    M(KeeperRequestTotal) \
+    M(KeeperLatency) \
+    M(KeeperCommits) \
+    M(KeeperCommitsFailed) \
+    M(KeeperSnapshotCreations) \
+    M(KeeperSnapshotCreationsFailed) \
+    M(KeeperSnapshotApplys) \
+    M(KeeperSnapshotApplysFailed) \
+    M(KeeperReadSnapshot) \
+    M(KeeperSaveSnapshot) \
+    M(KeeperCreateRequest) \
+    M(KeeperRemoveRequest) \
+    M(KeeperSetRequest) \
+    M(KeeperReconfigRequest) \
+    M(KeeperCheckRequest) \
+    M(KeeperMultiRequest) \
+    M(KeeperMultiReadRequest) \
+    M(KeeperGetRequest) \
+    M(KeeperListRequest) \
+    M(KeeperExistsRequest) \
+\
+    M(IOUringSQEsSubmitted) \
+    M(IOUringSQEsResubmits) \
+    M(IOUringCQEsCompleted) \
+    M(IOUringCQEsFailed) \
+\
+    M(LogTest) \
+    M(LogTrace) \
+    M(LogDebug) \
+    M(LogInfo) \
+    M(LogWarning) \
+    M(LogError) \
+    M(LogFatal) \
+\
+    M(InterfaceHTTPSendBytes) \
+    M(InterfaceHTTPReceiveBytes) \
+    M(InterfaceNativeSendBytes) \
+    M(InterfaceNativeReceiveBytes) \
+    M(InterfacePrometheusSendBytes) \
+    M(InterfacePrometheusReceiveBytes) \
+    M(InterfaceInterserverSendBytes) \
+    M(InterfaceInterserverReceiveBytes) \
+    M(InterfaceMySQLSendBytes) \
+    M(InterfaceMySQLReceiveBytes) \
+    M(InterfacePostgreSQLSendBytes) \
+    M(InterfacePostgreSQLReceiveBytes)
+
+namespace ProfileEvents
+{
+#define M(NAME) extern const Event NAME;
+    APPLY_FOR_KEEPER_PROFILE_EVENTS(M)
+#undef M
+
+#define M(NAME) NAME,
+extern const std::vector<Event> keeper_profile_events
+{
+    APPLY_FOR_KEEPER_PROFILE_EVENTS(M)
+};
+#undef M
+}
+
+/// Metrics which are useful for Keeper.
+/// New metrics should be added manually.
+#define APPLY_FOR_KEEPER_METRICS(M) \
+    M(BackgroundCommonPoolTask) \
+    M(BackgroundCommonPoolSize) \
+    M(TCPConnection) \
+    M(HTTPConnection) \
+    M(OpenFileForRead) \
+    M(OpenFileForWrite) \
+    M(Read) \
+    M(RemoteRead) \
+    M(Write) \
+    M(NetworkReceive) \
+    M(NetworkSend) \
+    M(MemoryTracking) \
+    M(ContextLockWait) \
+    M(Revision) \
+    M(VersionInteger) \
+    M(RWLockWaitingReaders) \
+    M(RWLockWaitingWriters) \
+    M(RWLockActiveReaders) \
+    M(RWLockActiveWriters) \
+    M(GlobalThread) \
+    M(GlobalThreadActive) \
+    M(GlobalThreadScheduled) \
+    M(LocalThread) \
+    M(LocalThreadActive) \
+    M(LocalThreadScheduled) \
+    M(IOPrefetchThreads) \
+    M(IOPrefetchThreadsActive) \
+    M(IOPrefetchThreadsScheduled) \
+    M(IOWriterThreads) \
+    M(IOWriterThreadsActive) \
+    M(IOWriterThreadsScheduled) \
+    M(IOThreads) \
+    M(IOThreadsActive) \
+    M(IOThreadsScheduled) \
+    M(ThreadPoolRemoteFSReaderThreads) \
+    M(ThreadPoolRemoteFSReaderThreadsActive) \
+    M(ThreadPoolRemoteFSReaderThreadsScheduled) \
+    M(ThreadPoolFSReaderThreads) \
+    M(ThreadPoolFSReaderThreadsActive) \
+    M(ThreadPoolFSReaderThreadsScheduled) \
+    M(DiskObjectStorageAsyncThreads) \
+    M(DiskObjectStorageAsyncThreadsActive) \
+    M(ObjectStorageS3Threads) \
+    M(ObjectStorageS3ThreadsActive) \
+    M(ObjectStorageS3ThreadsScheduled) \
+    M(ObjectStorageAzureThreads) \
+    M(ObjectStorageAzureThreadsActive) \
+    M(ObjectStorageAzureThreadsScheduled) \
+    M(MMappedFiles) \
+    M(MMappedFileBytes) \
+    M(AsynchronousReadWait) \
+    M(S3Requests) \
+    M(KeeperAliveConnections) \
+    M(KeeperOutstandingRequets) \
+    M(ThreadsInOvercommitTracker) \
+    M(IOUringPendingEvents) \
+    M(IOUringInFlightEvents) \
+
+namespace CurrentMetrics
+{
+#define M(NAME) extern const Metric NAME;
+    APPLY_FOR_KEEPER_METRICS(M)
+#undef M
+
+#define M(NAME) NAME,
+extern const std::vector<Metric> keeper_metrics
+{
+    APPLY_FOR_KEEPER_METRICS(M)
+};
+#undef M
+}
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 66b55f68217..06ca1182be5 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <Server/HTTPHandlerFactory.h>
 
 #include <Server/HTTP/HTTPRequestHandler.h>
@@ -7,6 +8,7 @@
 #include <Poco/Util/AbstractConfiguration.h>
 
 #include "HTTPHandler.h"
+#include "Server/PrometheusMetricsWriter.h"
 #include "StaticRequestHandler.h"
 #include "ReplicasStatusHandler.h"
 #include "InterserverIOHTTPHandler.h"
@@ -113,7 +115,10 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::
     else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
         return createInterserverHTTPHandlerFactory(server, name);
     else if (name == "PrometheusHandler-factory")
-        return createPrometheusMainHandlerFactory(server, config, async_metrics, name);
+    {
+        auto metrics_writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
+        return createPrometheusMainHandlerFactory(server, config, metrics_writer, name);
+    }
 
     throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name.");
 }
@@ -208,7 +213,7 @@ void addDefaultHandlersFactory(
     /// Otherwise it will be created separately, see createHandlerFactory(...).
     if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0)
     {
-        PrometheusMetricsWriter writer(config, "prometheus", async_metrics);
+        auto writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
         auto creator = [&server, writer] () -> std::unique_ptr<PrometheusRequestHandler>
         {
             return std::make_unique<PrometheusRequestHandler>(server, writer);
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 94b02e52277..427d495f659 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -6,6 +6,7 @@
 #include <Server/HTTPHandlerRequestFilter.h>
 #include <Server/HTTPRequestHandlerFactoryMain.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Server/PrometheusMetricsWriter.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -130,10 +131,10 @@ createPrometheusHandlerFactory(IServer & server,
     AsynchronousMetrics & async_metrics,
     const std::string & config_prefix);
 
-HTTPRequestHandlerFactoryPtr
-createPrometheusMainHandlerFactory(IServer & server,
+HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory(
+    IServer & server,
     const Poco::Util::AbstractConfiguration & config,
-    AsynchronousMetrics & async_metrics,
+    PrometheusMetricsWriterPtr metrics_writer,
     const std::string & name);
 
 /// @param server - used in handlers to check IServer::isCancelled()
diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp
index 3d09c2165e5..70a2fa0a41b 100644
--- a/src/Server/PrometheusMetricsWriter.cpp
+++ b/src/Server/PrometheusMetricsWriter.cpp
@@ -38,6 +38,79 @@ void convertHelpToSingleLine(std::string & help)
     std::replace(help.begin(), help.end(), '\n', ' ');
 }
 
+constexpr auto profile_events_prefix = "ClickHouseProfileEvents_";
+constexpr auto current_metrics_prefix = "ClickHouseMetrics_";
+constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_";
+constexpr auto error_metrics_prefix = "ClickHouseErrorMetric_";
+
+void writeEvent(DB::WriteBuffer & wb, ProfileEvents::Event event)
+{
+    const auto counter = ProfileEvents::global_counters[event].load(std::memory_order_relaxed);
+
+    std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(event))};
+    std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(event))};
+
+    convertHelpToSingleLine(metric_doc);
+
+    if (!replaceInvalidChars(metric_name))
+        return;
+
+    std::string key{profile_events_prefix + metric_name};
+
+    writeOutLine(wb, "# HELP", key, metric_doc);
+    writeOutLine(wb, "# TYPE", key, "counter");
+    writeOutLine(wb, key, counter);
+}
+
+void writeMetric(DB::WriteBuffer & wb, size_t metric)
+{
+    const auto value = CurrentMetrics::values[metric].load(std::memory_order_relaxed);
+
+    std::string metric_name{CurrentMetrics::getName(static_cast<CurrentMetrics::Metric>(metric))};
+    std::string metric_doc{CurrentMetrics::getDocumentation(static_cast<CurrentMetrics::Metric>(metric))};
+
+    convertHelpToSingleLine(metric_doc);
+
+    if (!replaceInvalidChars(metric_name))
+        return;
+
+    std::string key{current_metrics_prefix + metric_name};
+
+    writeOutLine(wb, "# HELP", key, metric_doc);
+    writeOutLine(wb, "# TYPE", key, "gauge");
+    writeOutLine(wb, key, value);
+}
+
+void writeAsyncMetrics(DB::WriteBuffer & wb, const DB::AsynchronousMetricValues & values)
+{
+    for (const auto & name_value : values)
+    {
+        std::string key{asynchronous_metrics_prefix + name_value.first};
+
+        if (!replaceInvalidChars(key))
+            continue;
+
+        auto value = name_value.second;
+
+        std::string metric_doc{value.documentation};
+        convertHelpToSingleLine(metric_doc);
+
+        writeOutLine(wb, "# HELP", key, metric_doc);
+        writeOutLine(wb, "# TYPE", key, "gauge");
+        writeOutLine(wb, key, value.value);
+    }
+}
+
+}
+
+namespace ProfileEvents
+{
+    extern const std::vector<Event> keeper_profile_events;
+}
+
+namespace CurrentMetrics
+{
+    extern const std::vector<Metric> keeper_metrics;
 }
 
 
@@ -60,65 +133,17 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
     if (send_events)
     {
         for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i)
-        {
-            const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed);
-
-            std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
-            std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(i))};
-
-            convertHelpToSingleLine(metric_doc);
-
-            if (!replaceInvalidChars(metric_name))
-                continue;
-            std::string key{profile_events_prefix + metric_name};
-
-            writeOutLine(wb, "# HELP", key, metric_doc);
-            writeOutLine(wb, "# TYPE", key, "counter");
-            writeOutLine(wb, key, counter);
-        }
+            writeEvent(wb, i);
     }
 
     if (send_metrics)
     {
         for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i)
-        {
-            const auto value = CurrentMetrics::values[i].load(std::memory_order_relaxed);
-
-            std::string metric_name{CurrentMetrics::getName(static_cast<CurrentMetrics::Metric>(i))};
-            std::string metric_doc{CurrentMetrics::getDocumentation(static_cast<CurrentMetrics::Metric>(i))};
-
-            convertHelpToSingleLine(metric_doc);
-
-            if (!replaceInvalidChars(metric_name))
-                continue;
-            std::string key{current_metrics_prefix + metric_name};
-
-            writeOutLine(wb, "# HELP", key, metric_doc);
-            writeOutLine(wb, "# TYPE", key, "gauge");
-            writeOutLine(wb, key, value);
-        }
+            writeMetric(wb, i);
     }
 
     if (send_asynchronous_metrics)
-    {
-        auto async_metrics_values = async_metrics.getValues();
-        for (const auto & name_value : async_metrics_values)
-        {
-            std::string key{asynchronous_metrics_prefix + name_value.first};
-
-            if (!replaceInvalidChars(key))
-                continue;
-
-            auto value = name_value.second;
-
-            std::string metric_doc{value.documentation};
-            convertHelpToSingleLine(metric_doc);
-
-            writeOutLine(wb, "# HELP", key, metric_doc);
-            writeOutLine(wb, "# TYPE", key, "gauge");
-            writeOutLine(wb, key, value.value);
-        }
-    }
+        writeAsyncMetrics(wb, async_metrics.getValues());
 
     if (send_errors)
     {
@@ -152,4 +177,22 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
 
 }
 
+void KeeperPrometheusMetricsWriter::write(WriteBuffer & wb) const
+{
+    if (send_events)
+    {
+        for (auto event : ProfileEvents::keeper_profile_events)
+            writeEvent(wb, event);
+    }
+
+    if (send_metrics)
+    {
+        for (auto metric : CurrentMetrics::keeper_metrics)
+            writeMetric(wb, metric);
+    }
+
+    if (send_asynchronous_metrics)
+        writeAsyncMetrics(wb, async_metrics.getValues());
+}
+
 }
diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h
index b909a0ddcf6..933ad909ee0 100644
--- a/src/Server/PrometheusMetricsWriter.h
+++ b/src/Server/PrometheusMetricsWriter.h
@@ -3,6 +3,7 @@
 #include <string>
 
 #include <Common/AsynchronousMetrics.h>
+#include <Common/ProfileEvents.h>
 #include <IO/WriteBuffer.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
@@ -19,20 +20,25 @@ public:
         const Poco::Util::AbstractConfiguration & config, const std::string & config_name,
         const AsynchronousMetrics & async_metrics_);
 
-    void write(WriteBuffer & wb) const;
+    virtual void write(WriteBuffer & wb) const;
 
-private:
+    virtual ~PrometheusMetricsWriter() = default;
+
+protected:
     const AsynchronousMetrics & async_metrics;
-
     const bool send_events;
     const bool send_metrics;
     const bool send_asynchronous_metrics;
     const bool send_errors;
-
-    static inline constexpr auto profile_events_prefix = "ClickHouseProfileEvents_";
-    static inline constexpr auto current_metrics_prefix = "ClickHouseMetrics_";
-    static inline constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_";
-    static inline constexpr auto error_metrics_prefix = "ClickHouseErrorMetric_";
 };
 
+class KeeperPrometheusMetricsWriter : public PrometheusMetricsWriter
+{
+    using PrometheusMetricsWriter::PrometheusMetricsWriter;
+
+    void write(WriteBuffer & wb) const override;
+};
+
+using PrometheusMetricsWriterPtr = std::shared_ptr<PrometheusMetricsWriter>;
+
 }
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 8690ec9121e..dff960f7031 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -7,6 +7,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
+#include "Server/PrometheusMetricsWriter.h"
 
 #include <Poco/Util/LayeredConfiguration.h>
 
@@ -34,7 +35,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event);
         try
         {
-            metrics_writer.write(wb);
+            metrics_writer->write(wb);
             wb.finalize();
         }
         catch (...)
@@ -54,7 +55,7 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory(
     AsynchronousMetrics & async_metrics,
     const std::string & config_prefix)
 {
-    PrometheusMetricsWriter writer(config, config_prefix + ".handler", async_metrics);
+    auto writer = std::make_shared<PrometheusMetricsWriter>(config, config_prefix + ".handler", async_metrics);
     auto creator = [&server, writer]() -> std::unique_ptr<PrometheusRequestHandler>
     {
         return std::make_unique<PrometheusRequestHandler>(server, writer);
@@ -66,13 +67,12 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory(
 }
 
 HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory(
-    IServer & server, const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics, const std::string & name)
+    IServer & server, const Poco::Util::AbstractConfiguration & config, PrometheusMetricsWriterPtr metrics_writer, const std::string & name)
 {
     auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
-    PrometheusMetricsWriter writer(config, "prometheus", async_metrics);
-    auto creator = [&server, writer]() -> std::unique_ptr<PrometheusRequestHandler>
+    auto creator = [&server, metrics_writer]
     {
-        return std::make_unique<PrometheusRequestHandler>(server, writer);
+        return std::make_unique<PrometheusRequestHandler>(server, metrics_writer);
     };
 
     auto handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 9ec54cc2e4e..d120752c8c5 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -13,12 +13,12 @@ class PrometheusRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
-    const PrometheusMetricsWriter & metrics_writer;
+    PrometheusMetricsWriterPtr metrics_writer;
 
 public:
-    PrometheusRequestHandler(IServer & server_, const PrometheusMetricsWriter & metrics_writer_)
+    PrometheusRequestHandler(IServer & server_, PrometheusMetricsWriterPtr metrics_writer_)
         : server(server_)
-        , metrics_writer(metrics_writer_)
+        , metrics_writer(std::move(metrics_writer_))
     {
     }
 

From 2389cd5e61af2f978e974175f6db4656fbd6fab1 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 13 Feb 2024 16:27:29 +0100
Subject: [PATCH 621/884] Fix style

---
 utils/check-style/check-style | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index daee2e7fb00..baaa3583195 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -76,6 +76,7 @@ EXTERN_TYPES_EXCLUDES=(
     ProfileEvents::getProfileEvents
     ProfileEvents::ThreadIdToCountersSnapshot
     ProfileEvents::LOCAL_NAME
+    ProfileEvents::keeper_profile_events
     ProfileEvents::CountersIncrement
 
     CurrentMetrics::add
@@ -87,6 +88,7 @@ EXTERN_TYPES_EXCLUDES=(
     CurrentMetrics::Metric
     CurrentMetrics::values
     CurrentMetrics::Value
+    CurrentMetrics::keeper_metrics
 
     ErrorCodes::ErrorCode
     ErrorCodes::getName
@@ -106,7 +108,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
         # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
         # and this matches with zkutil::CreateMode
-        grep -v 'src/Common/ZooKeeper/Types.h'
+        grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp'
     } | {
         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
     } | while read file; do

From 45d359cb9359671af477859f171e6f953d4826e1 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Tue, 13 Feb 2024 16:33:54 +0100
Subject: [PATCH 622/884] Use version with execution names

---
 .../integration/test_distributed_backward_compatability/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py
index 5b60788a4ef..79b937558cd 100644
--- a/tests/integration/test_distributed_backward_compatability/test.py
+++ b/tests/integration/test_distributed_backward_compatability/test.py
@@ -8,7 +8,7 @@ node_old = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml"],
     image="clickhouse/clickhouse-server",
-    tag="23.3",
+    tag="23.12",
     stay_alive=True,
     with_installed_binary=True,
 )

From 253abd16436875faa3d0bbf939ae0e01d847ad3b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 16:50:18 +0000
Subject: [PATCH 623/884] Refactor Squashing for inserts.

---
 src/Interpreters/InterpreterInsertQuery.cpp   | 70 ++++++++-------
 src/Interpreters/InterpreterInsertQuery.h     |  5 +-
 .../Transforms/CountingTransform.cpp          |  4 +-
 src/Processors/Transforms/CountingTransform.h | 86 ++++++++++++++++---
 .../Transforms/buildPushingToViewsChain.cpp   | 18 ++++
 src/QueryPipeline/QueryPipeline.cpp           |  7 +-
 6 files changed, 139 insertions(+), 51 deletions(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 734306cf58d..8e735fbdb54 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -274,7 +274,7 @@ Chain InterpreterInsertQuery::buildChain(
     auto sample = getSampleBlock(columns, table, metadata_snapshot);
 
     Chain sink = buildSink(table, metadata_snapshot, thread_status_holder, running_group, elapsed_counter_ms);
-    Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample, thread_status_holder);
+    Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample);
 
     chain.appendChain(std::move(sink));
     return chain;
@@ -317,25 +317,31 @@ Chain InterpreterInsertQuery::buildSink(
     return out;
 }
 
+bool InterpreterInsertQuery::shouldAddSquashingFroStorage(const StoragePtr & table) const
+{
+    auto context_ptr = getContext();
+    const Settings & settings = context_ptr->getSettingsRef();
+    const ASTInsertQuery * query = nullptr;
+    if (query_ptr)
+        query = query_ptr->as<ASTInsertQuery>();
+
+    /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
+    /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
+    return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch);
+}
+
 Chain InterpreterInsertQuery::buildPreSinkChain(
     const Block & subsequent_header,
     const StoragePtr & table,
     const StorageMetadataPtr & metadata_snapshot,
-    const Block & query_sample_block,
-    ThreadStatusesHolderPtr thread_status_holder)
+    const Block & query_sample_block)
 {
-    ThreadStatus * thread_status = current_thread;
-
-    if (!thread_status_holder)
-        thread_status = nullptr;
-
     auto context_ptr = getContext();
 
     const ASTInsertQuery * query = nullptr;
     if (query_ptr)
         query = query_ptr->as<ASTInsertQuery>();
 
-    const Settings & settings = context_ptr->getSettingsRef();
     bool null_as_default = query && query->select && context_ptr->getSettingsRef().insert_null_as_default;
 
     /// We create a pipeline of several streams, into which we will write data.
@@ -366,26 +372,6 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
     /// because some clients break insertion protocol (columns != header)
     out.addSource(std::make_shared<ConvertingTransform>(query_sample_block, adding_missing_defaults_actions));
 
-    /// It's important to squash blocks as early as possible (before other transforms),
-    ///  because other transforms may work inefficient if block size is small.
-
-    /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
-    /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
-    if (!(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch))
-    {
-        bool table_prefers_large_blocks = table->prefersLargeBlocks();
-
-        out.addSource(std::make_shared<SquashingChunksTransform>(
-            input_header(),
-            table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
-            table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
-    }
-
-    auto counting = std::make_shared<CountingTransform>(input_header(), thread_status, getContext()->getQuota());
-    counting->setProcessListElement(context_ptr->getProcessListElement());
-    counting->setProgressCallback(context_ptr->getProgressCallback());
-    out.addSource(std::move(counting));
-
     return out;
 }
 
@@ -558,8 +544,7 @@ BlockIO InterpreterInsertQuery::execute()
         }
         for (size_t i = 0; i < pre_streams_size; ++i)
         {
-            auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot,
-                query_sample_block, /* thread_status_holder= */ nullptr);
+            auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
             presink_chains.emplace_back(std::move(out));
         }
     }
@@ -592,6 +577,29 @@ BlockIO InterpreterInsertQuery::execute()
             return std::make_shared<MaterializingTransform>(in_header);
         });
 
+        pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+        {
+            auto context_ptr = getContext();
+            auto counting = std::make_shared<SimpleCountingTransform>(in_header, context_ptr->getQuota());
+            counting->setProcessListElement(context_ptr->getProcessListElement());
+            counting->setProgressCallback(context_ptr->getProgressCallback());
+
+            return counting;
+        });
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
+            {
+                return std::make_shared<SimpleSquashingChunksTransform>(
+                    in_header,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+            });
+        }
+
         size_t num_select_threads = pipeline.getNumThreads();
 
         for (auto & chain : presink_chains)
diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h
index 845cb6b730b..74baf4bc4f6 100644
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@@ -59,6 +59,8 @@ public:
 
     void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.push_back(std::move(buffer)); }
 
+    bool shouldAddSquashingFroStorage(const StoragePtr & table) const;
+
 private:
     Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;
 
@@ -81,8 +83,7 @@ private:
         const Block & subsequent_header,
         const StoragePtr & table,
         const StorageMetadataPtr & metadata_snapshot,
-        const Block & query_sample_block,
-        ThreadStatusesHolderPtr thread_status_holder);
+        const Block & query_sample_block);
 };
 
 
diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp
index 3dfb9fe178f..7143ae2b8fc 100644
--- a/src/Processors/Transforms/CountingTransform.cpp
+++ b/src/Processors/Transforms/CountingTransform.cpp
@@ -15,7 +15,7 @@ namespace ProfileEvents
 namespace DB
 {
 
-void CountingTransform::onConsume(Chunk chunk)
+void CountingBase::count(const Chunk & chunk)
 {
     if (quota)
         quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes());
@@ -40,8 +40,6 @@ void CountingTransform::onConsume(Chunk chunk)
 
     if (progress_callback)
         progress_callback(local_progress);
-
-    cur_chunk = std::move(chunk);
 }
 
 }
diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h
index 05d8e2aeac8..2f067f7ac6b 100644
--- a/src/Processors/Transforms/CountingTransform.h
+++ b/src/Processors/Transforms/CountingTransform.h
@@ -2,6 +2,7 @@
 
 #include <IO/Progress.h>
 #include <Processors/Transforms/ExceptionKeepingTransform.h>
+#include <Processors/ISimpleTransform.h>
 #include <Access/EnabledQuota.h>
 
 
@@ -13,17 +14,13 @@ using QueryStatusPtr = std::shared_ptr<QueryStatus>;
 class ThreadStatus;
 
 /// Proxy class which counts number of written block, rows, bytes
-class CountingTransform final : public ExceptionKeepingTransform
+class CountingBase
 {
 public:
-    explicit CountingTransform(
-        const Block & header,
+    explicit CountingBase(
         ThreadStatus * thread_status_ = nullptr,
         std::shared_ptr<const EnabledQuota> quota_ = nullptr)
-        : ExceptionKeepingTransform(header, header)
-        , thread_status(thread_status_), quota(std::move(quota_)) {}
-
-    String getName() const override { return "CountingTransform"; }
+        : thread_status(thread_status_), quota(std::move(quota_)) {}
 
     void setProgressCallback(const ProgressCallback & callback)
     {
@@ -40,13 +37,7 @@ public:
         return progress;
     }
 
-    void onConsume(Chunk chunk) override;
-    GenerateResult onGenerate() override
-    {
-        GenerateResult res;
-        res.chunk = std::move(cur_chunk);
-        return res;
-    }
+    void count(const Chunk & chunk);
 
 protected:
     Progress progress;
@@ -56,7 +47,74 @@ protected:
 
     /// Quota is used to limit amount of written bytes.
     std::shared_ptr<const EnabledQuota> quota;
+};
+
+class CountingTransform2 final : public ExceptionKeepingTransform
+{
+public:
+    explicit CountingTransform2(
+        const Block & header,
+        ThreadStatus * thread_status_ = nullptr,
+        std::shared_ptr<const EnabledQuota> quota_ = nullptr)
+        : ExceptionKeepingTransform(header, header), counting(thread_status_, std::move(quota_)) {}
+
+    String getName() const override { return "CountingTransform"; }
+    void onConsume(Chunk chunk) override
+    {
+        counting.count(chunk);
+        cur_chunk = std::move(chunk);
+    }
+
+    GenerateResult onGenerate() override
+    {
+        GenerateResult res;
+        res.chunk = std::move(cur_chunk);
+        return res;
+    }
+
+    void setProgressCallback(const ProgressCallback & callback)
+    {
+        counting.setProgressCallback(callback);
+    }
+
+    void setProcessListElement(QueryStatusPtr elem)
+    {
+        counting.setProcessListElement(std::move(elem));
+    }
+
+protected:
+    CountingBase counting;
     Chunk cur_chunk;
 };
 
+class SimpleCountingTransform final : public ISimpleTransform
+{
+public:
+    explicit SimpleCountingTransform(
+        const Block & header,
+        std::shared_ptr<const EnabledQuota> quota_ = nullptr)
+        : ISimpleTransform(header, header, false)
+        , counting(nullptr, std::move(quota_))
+    {}
+
+    String getName() const override { return "SimpleCountingTransform"; }
+    void transform(Chunk & chunk) override
+    {
+        counting.count(chunk);
+    }
+
+    void setProgressCallback(const ProgressCallback & callback)
+    {
+        counting.setProgressCallback(callback);
+    }
+
+    void setProcessListElement(QueryStatusPtr elem)
+    {
+        counting.setProcessListElement(std::move(elem));
+    }
+
+protected:
+    CountingBase counting;
+};
+
 }
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 40f4166283b..a3c3eb4037b 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Parsers/ASTInsertQuery.h>
+#include <Processors/Transforms/CountingTransform.h>
 #include <Processors/Transforms/SquashingChunksTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
@@ -412,6 +413,23 @@ Chain buildPushingToViewsChain(
 
             InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false);
             out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
+
+            if (interpreter.shouldAddSquashingFroStorage(inner_table))
+            {
+                bool table_prefers_large_blocks = inner_table->prefersLargeBlocks();
+                const auto & settings = view_insert_context->getSettingsRef();
+
+                out.addSource(std::make_shared<SquashingChunksTransform>(
+                    out.getInputHeader(),
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
+            }
+
+            auto counting = std::make_shared<CountingTransform2>(out.getInputHeader(), thread_status, view_insert_context->getQuota());
+            counting->setProcessListElement(view_insert_context->getProcessListElement());
+            counting->setProgressCallback(view_insert_context->getProgressCallback());
+            out.addSource(std::move(counting));
+
             out.addStorageHolder(view);
             out.addStorageHolder(inner_table);
         }
diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp
index 935c006c217..54b2d1df493 100644
--- a/src/QueryPipeline/QueryPipeline.cpp
+++ b/src/QueryPipeline/QueryPipeline.cpp
@@ -549,7 +549,12 @@ void QueryPipeline::setProcessListElement(QueryStatusPtr elem)
 
     if (pushing())
     {
-        if (auto * counting = dynamic_cast<CountingTransform *>(&input->getProcessor()))
+        if (auto * counting = dynamic_cast<CountingTransform2 *>(&input->getProcessor()))
+        {
+            counting->setProcessListElement(elem);
+        }
+
+        if (auto * counting = dynamic_cast<SimpleCountingTransform *>(&input->getProcessor()))
         {
             counting->setProcessListElement(elem);
         }

From 431aee4de3cce16411682425cd6eda4e645be04e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 12 Feb 2024 16:52:54 +0000
Subject: [PATCH 624/884] Rename back.

---
 src/Processors/Transforms/CountingTransform.h          | 4 ++--
 src/Processors/Transforms/buildPushingToViewsChain.cpp | 2 +-
 src/QueryPipeline/QueryPipeline.cpp                    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h
index 2f067f7ac6b..7fd9bd12d4b 100644
--- a/src/Processors/Transforms/CountingTransform.h
+++ b/src/Processors/Transforms/CountingTransform.h
@@ -49,10 +49,10 @@ protected:
     std::shared_ptr<const EnabledQuota> quota;
 };
 
-class CountingTransform2 final : public ExceptionKeepingTransform
+class CountingTransform final : public ExceptionKeepingTransform
 {
 public:
-    explicit CountingTransform2(
+    explicit CountingTransform(
         const Block & header,
         ThreadStatus * thread_status_ = nullptr,
         std::shared_ptr<const EnabledQuota> quota_ = nullptr)
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index a3c3eb4037b..702d60e375b 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -425,7 +425,7 @@ Chain buildPushingToViewsChain(
                     table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
             }
 
-            auto counting = std::make_shared<CountingTransform2>(out.getInputHeader(), thread_status, view_insert_context->getQuota());
+            auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status, view_insert_context->getQuota());
             counting->setProcessListElement(view_insert_context->getProcessListElement());
             counting->setProgressCallback(view_insert_context->getProgressCallback());
             out.addSource(std::move(counting));
diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp
index 54b2d1df493..569d4655460 100644
--- a/src/QueryPipeline/QueryPipeline.cpp
+++ b/src/QueryPipeline/QueryPipeline.cpp
@@ -549,7 +549,7 @@ void QueryPipeline::setProcessListElement(QueryStatusPtr elem)
 
     if (pushing())
     {
-        if (auto * counting = dynamic_cast<CountingTransform2 *>(&input->getProcessor()))
+        if (auto * counting = dynamic_cast<CountingTransform *>(&input->getProcessor()))
         {
             counting->setProcessListElement(elem);
         }

From 134837d6fbc4face69bf3b73eff629e4c4d695cd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 13:54:23 +0000
Subject: [PATCH 625/884] Fixing tests.

---
 src/Interpreters/InterpreterInsertQuery.cpp | 23 ++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 8e735fbdb54..beb9f4d1857 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -642,7 +642,28 @@ BlockIO InterpreterInsertQuery::execute()
     }
     else
     {
-        presink_chains.at(0).appendChain(std::move(sink_chains.at(0)));
+        auto & chain = presink_chains.at(0);
+        chain.appendChain(std::move(sink_chains.at(0)));
+
+        auto context_ptr = getContext();
+        auto counting = std::make_shared<SimpleCountingTransform>(chain.getInputHeader(), context_ptr->getQuota());
+        counting->setProcessListElement(context_ptr->getProcessListElement());
+        counting->setProgressCallback(context_ptr->getProgressCallback());
+
+        chain.addSource(std::move(counting));
+
+        if (shouldAddSquashingFroStorage(table))
+        {
+            bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
+            auto squashing = std::make_shared<SimpleSquashingChunksTransform>(
+                chain.getInputHeader(),
+                table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
+
+            chain.addSource(std::move(squashing));
+        }
+
         res.pipeline = QueryPipeline(std::move(presink_chains[0]));
         res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
         res.pipeline.setConcurrencyControl(settings.use_concurrency_control);

From 476bec4baf27c5a42ea2638c1705ea529a0f99a2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 14:59:26 +0000
Subject: [PATCH 626/884] Fixing tests.

---
 src/Processors/Transforms/buildPushingToViewsChain.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 702d60e375b..91bbf04f327 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -425,7 +425,7 @@ Chain buildPushingToViewsChain(
                     table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
             }
 
-            auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status, view_insert_context->getQuota());
+            auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), current_thread, view_insert_context->getQuota());
             counting->setProcessListElement(view_insert_context->getProcessListElement());
             counting->setProgressCallback(view_insert_context->getProgressCallback());
             out.addSource(std::move(counting));

From 262e8af90912e9d23e7df5a9330958824659b75f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 13 Feb 2024 17:37:20 +0100
Subject: [PATCH 627/884] Analyzer: Fix test_user_defined_object_persistence

---
 tests/analyzer_integration_broken_tests.txt       |  1 -
 .../test_user_defined_object_persistence/test.py  | 15 +++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c04ed440c18..30cc5a0c917 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -16,7 +16,6 @@ test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
 test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
-test_user_defined_object_persistence/test.py::test_persistence
 test_wrong_db_or_table_name/test.py::test_wrong_table_name
 test_zookeeper_config/test.py::test_chroot_with_same_root
 test_zookeeper_config/test.py::test_chroot_with_different_root
diff --git a/tests/integration/test_user_defined_object_persistence/test.py b/tests/integration/test_user_defined_object_persistence/test.py
index 8d775411b61..1919da0726e 100644
--- a/tests/integration/test_user_defined_object_persistence/test.py
+++ b/tests/integration/test_user_defined_object_persistence/test.py
@@ -35,9 +35,16 @@ def test_persistence():
 
     instance.restart_clickhouse()
 
-    assert "Unknown function MySum1" in instance.query_and_get_error(
-        "SELECT MySum1(1, 2)"
+    error_message = instance.query_and_get_error("SELECT MySum1(1, 2)")
+    assert (
+        "Unknown function MySum1" in error_message
+        or "Function with name 'MySum1' does not exists. In scope SELECT MySum1(1, 2)"
+        in error_message
     )
-    assert "Unknown function MySum2" in instance.query_and_get_error(
-        "SELECT MySum2(1, 2)"
+
+    error_message = instance.query_and_get_error("SELECT MySum2(1, 2)")
+    assert (
+        "Unknown function MySum2" in error_message
+        or "Function with name 'MySum2' does not exists. In scope SELECT MySum2(1, 2)"
+        in error_message
     )

From c2ad7693dbc263fd9d618f4428de94482f6326d2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 17:32:24 +0000
Subject: [PATCH 628/884] Review fixes.

---
 src/Planner/Planner.cpp                   |  4 +--
 src/Planner/findParallelReplicasQuery.cpp | 37 +++++++++++++---------
 src/Planner/findParallelReplicasQuery.h   | 38 -----------------------
 3 files changed, 24 insertions(+), 55 deletions(-)
 delete mode 100644 src/Planner/findParallelReplicasQuery.h

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 2e763a41f1a..ace6d500482 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -64,7 +64,7 @@
 #include <Analyzer/AggregationUtils.h>
 #include <Analyzer/WindowFunctionsUtils.h>
 
-#include <Planner/findParallelReplicasQuery.h>
+#include <Planner/findQueryForParallelReplicas.h>
 #include <Planner/Utils.h>
 #include <Planner/PlannerContext.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -1163,7 +1163,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_,
     , select_query_options(select_query_options_)
     , planner_context(buildPlannerContext(query_tree, select_query_options,
         std::make_shared<GlobalPlannerContext>(
-            findParallelReplicasQuery(query_tree, select_query_options),
+            findQueryForParallelReplicas(query_tree, select_query_options),
             findTableForParallelReplicas(query_tree, select_query_options))))
 {
 }
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index 8b03fc97bec..362f7109f47 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -1,4 +1,4 @@
-#include <Planner/findParallelReplicasQuery.h>
+#include <Planner/findQueryForParallelReplicas.h>
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Processors/QueryPlan/JoinStep.h>
@@ -152,7 +152,7 @@ QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, co
 /// Find the best candidate for parallel replicas execution by verifying query plan.
 /// If query plan has only Expression, Filter of Join steps, we can execute it fully remotely and check the next query.
 /// Otherwise we can execute current query up to WithMergableStage only.
-const QueryNode * findParallelReplicasQuery(
+const QueryNode * findQueryForParallelReplicas(
     std::stack<const QueryNode *> stack,
     const std::unordered_map<const QueryNode *, const QueryPlan::Node *> & mapping)
 {
@@ -227,7 +227,7 @@ const QueryNode * findParallelReplicasQuery(
     return res;
 }
 
-const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options)
+const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options)
 {
     if (select_query_options.only_analyze)
         return nullptr;
@@ -269,10 +269,10 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
     /// This part is a bit clumsy.
     /// We updated a query_tree with dummy storages, and mapping is using updated_query_tree now.
     /// But QueryNode result should be taken from initial query tree.
-    /// So that we build a list of candidates again, and call findParallelReplicasQuery for it.
+    /// So that we build a list of candidates again, and call findQueryForParallelReplicas for it.
     auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get());
     const auto & mapping = planner.getQueryNodeToPlanStepMapping();
-    const auto * res = findParallelReplicasQuery(new_stack, mapping);
+    const auto * res = findQueryForParallelReplicas(new_stack, mapping);
 
     /// Now, return a query from initial stack.
     if (res)
@@ -292,8 +292,15 @@ const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_
 
 static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * query_tree_node)
 {
-    while (query_tree_node)
+    std::stack<const IQueryTreeNode *> right_join_nodes;
+    while (query_tree_node || !right_join_nodes.empty())
     {
+        if (!query_tree_node)
+        {
+            query_tree_node = right_join_nodes.top();
+            right_join_nodes.pop();
+        }
+
         auto join_tree_node_type = query_tree_node->getNodeType();
 
         switch (join_tree_node_type)
@@ -305,11 +312,13 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que
                 if (std::dynamic_pointer_cast<MergeTreeData>(storage) || typeid_cast<const StorageDummy *>(storage.get()))
                     return &table_node;
 
-                return {};
+                query_tree_node = nullptr;
+                break;
             }
             case QueryTreeNodeType::TABLE_FUNCTION:
             {
-                return {};
+                query_tree_node = nullptr;
+                break;
             }
             case QueryTreeNodeType::QUERY:
             {
@@ -322,10 +331,10 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que
                 const auto & union_node = query_tree_node->as<UnionNode &>();
                 const auto & union_queries = union_node.getQueries().getNodes();
 
-                if (union_queries.empty())
-                    return {};
+                query_tree_node = nullptr;
+                if (!union_queries.empty())
+                    query_tree_node = union_queries.front().get();
 
-                query_tree_node = union_queries.front().get();
                 break;
             }
             case QueryTreeNodeType::ARRAY_JOIN:
@@ -337,10 +346,8 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que
             case QueryTreeNodeType::JOIN:
             {
                 const auto & join_node = query_tree_node->as<JoinNode &>();
-                if (const auto * res = findTableForParallelReplicas(join_node.getLeftTableExpression().get()))
-                    return res;
-
-                query_tree_node = join_node.getRightTableExpression().get();
+                query_tree_node = join_node.getLeftTableExpression().get();
+                right_join_nodes.push(join_node.getRightTableExpression().get());
                 break;
             }
             default:
diff --git a/src/Planner/findParallelReplicasQuery.h b/src/Planner/findParallelReplicasQuery.h
deleted file mode 100644
index ee910cbdaaa..00000000000
--- a/src/Planner/findParallelReplicasQuery.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-#include <list>
-#include <memory>
-
-namespace DB
-{
-
-class QueryNode;
-class TableNode;
-
-class IQueryTreeNode;
-using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
-
-struct SelectQueryOptions;
-
-/// Find a qury which can be executed with parallel replicas up to WithMergableStage.
-/// Returned query will always contain some (>1) subqueries, possibly with joins.
-const QueryNode * findParallelReplicasQuery(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
-
-/// Find a table from which we should read on follower replica. It's the left-most table within all JOINs and UNIONs.
-const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
-
-struct JoinTreeQueryPlan;
-
-class PlannerContext;
-using PlannerContextPtr = std::shared_ptr<PlannerContext>;
-
-struct StorageLimits;
-using StorageLimitsList = std::list<StorageLimits>;
-
-/// Execute QueryNode with parallel replicas up to WithMergableStage and return a plan.
-/// This method does not check that QueryNode is valid. Ideally it should be a result of findParallelReplicasQuery.
-JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
-    const QueryNode & query_node,
-    const PlannerContextPtr & planner_context,
-    std::shared_ptr<const StorageLimitsList> storage_limits);
-
-}

From 72bcadb7d518fa6d0c3f002be9927057076460b3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 17:37:25 +0000
Subject: [PATCH 629/884] Trying to fix settings change

---
 src/Core/SettingsChangesHistory.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 71d9b5d4e55..5089d2d6e4f 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -93,7 +93,10 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
               {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
               {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
-              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}},
+              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."},
+              {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
+              {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
+              {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
@@ -112,9 +115,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
               {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
               {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
-              {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
-              {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
               {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
               {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
               {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},

From a6fe66dd57b59a15c4881b9cfebf1487b53e66a8 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Feb 2024 17:56:20 +0000
Subject: [PATCH 630/884] Update tests

---
 .../0_stateless/00752_low_cardinality_lambda_argument.sql       | 1 +
 .../0_stateless/00752_low_cardinality_left_array_join.sql       | 1 +
 tests/queries/0_stateless/00945_bloom_filter_index.sql          | 1 +
 tests/queries/0_stateless/01414_low_cardinality_nullable.sql    | 2 ++
 tests/queries/0_stateless/01441_low_cardinality_array_index.sql | 2 ++
 tests/queries/0_stateless/01651_lc_insert_tiny_log.sql          | 1 +
 tests/queries/0_stateless/02184_nested_tuple.sql                | 1 +
 7 files changed, 9 insertions(+)

diff --git a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql
index a4bdbd5653c..998ff2f54d3 100644
--- a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql
+++ b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql
@@ -1,3 +1,4 @@
+set allow_suspicious_low_cardinality_types=1;
 drop table if exists lc_lambda;
 create table lc_lambda (arr Array(LowCardinality(UInt64))) engine = Memory;
 insert into lc_lambda select range(number) from system.numbers limit 10;
diff --git a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql
index 1c19700e34d..2d65f01a1b9 100644
--- a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql
+++ b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql
@@ -1,3 +1,4 @@
+set allow_suspicious_low_cardinality_types=1;
 drop table if exists lc_left_aj;
 CREATE TABLE lc_left_aj
 (
diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql
index dc47e858c4d..faa7feda04d 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -1,3 +1,4 @@
+SET allow_suspicious_low_cardinality_types=1;
 
 DROP TABLE IF EXISTS single_column_bloom_filter;
 
diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
index 2d3d31e9b5c..cd5111faf45 100644
--- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
+++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
@@ -1,3 +1,5 @@
+SET allow_suspicious_low_cardinality_types=1;
+
 DROP TABLE IF EXISTS lc_nullable;
 
 CREATE TABLE lc_nullable (
diff --git a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql
index 4b31a86edfb..b5e14c957c6 100644
--- a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql
+++ b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql
@@ -1,3 +1,5 @@
+SET allow_suspicious_low_cardinality_types=1;
+
 DROP TABLE IF EXISTS t_01411;
 
 CREATE TABLE t_01411(
diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
index 22532529812..d405bb01fd9 100644
--- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
+++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
@@ -1,3 +1,4 @@
+set allow_suspicious_low_cardinality_types=1;
 drop table if exists perf_lc_num;
 
 CREATE TABLE perf_lc_num(　        num UInt8,　        arr Array(LowCardinality(Int64)) default [num]　        ) ENGINE = TinyLog;
diff --git a/tests/queries/0_stateless/02184_nested_tuple.sql b/tests/queries/0_stateless/02184_nested_tuple.sql
index 67a20e3dce1..09ed8eb7200 100644
--- a/tests/queries/0_stateless/02184_nested_tuple.sql
+++ b/tests/queries/0_stateless/02184_nested_tuple.sql
@@ -1,3 +1,4 @@
+SET allow_suspicious_low_cardinality_types=1;
 DROP TABLE IF EXISTS t_nested_tuple;
 
 CREATE TABLE t_nested_tuple

From d697c12a4417c8d6aec28fef1d659d5cf8630ea1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 13 Feb 2024 17:57:35 +0000
Subject: [PATCH 631/884] Fixing build.

---
 src/Planner/findQueryForParallelReplicas.h | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 src/Planner/findQueryForParallelReplicas.h

diff --git a/src/Planner/findQueryForParallelReplicas.h b/src/Planner/findQueryForParallelReplicas.h
new file mode 100644
index 00000000000..f5dc69dfa0e
--- /dev/null
+++ b/src/Planner/findQueryForParallelReplicas.h
@@ -0,0 +1,38 @@
+#pragma once
+#include <list>
+#include <memory>
+
+namespace DB
+{
+
+class QueryNode;
+class TableNode;
+
+class IQueryTreeNode;
+using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
+
+struct SelectQueryOptions;
+
+/// Find a qury which can be executed with parallel replicas up to WithMergableStage.
+/// Returned query will always contain some (>1) subqueries, possibly with joins.
+const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
+
+/// Find a table from which we should read on follower replica. It's the left-most table within all JOINs and UNIONs.
+const TableNode * findTableForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options);
+
+struct JoinTreeQueryPlan;
+
+class PlannerContext;
+using PlannerContextPtr = std::shared_ptr<PlannerContext>;
+
+struct StorageLimits;
+using StorageLimitsList = std::list<StorageLimits>;
+
+/// Execute QueryNode with parallel replicas up to WithMergableStage and return a plan.
+/// This method does not check that QueryNode is valid. Ideally it should be a result of findParallelReplicasQuery.
+JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
+    const QueryNode & query_node,
+    const PlannerContextPtr & planner_context,
+    std::shared_ptr<const StorageLimitsList> storage_limits);
+
+}

From 30ef68d78fa3b2f29ffa0ac7daa06f402f63e51b Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 13 Feb 2024 18:59:23 +0100
Subject: [PATCH 632/884] Analyzer: Fix test_mutations_with_merge_tree

---
 tests/analyzer_integration_broken_tests.txt                     | 1 -
 .../test_mutations_with_merge_tree/configs/users.xml            | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index c04ed440c18..c48bb5a17b7 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -7,7 +7,6 @@ test_executable_table_function/test.py::test_executable_function_input_python
 test_mask_sensitive_info/test.py::test_encryption_functions
 test_merge_table_over_distributed/test.py::test_global_in
 test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed
-test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_replicating_constants/test.py::test_different_versions
diff --git a/tests/integration/test_mutations_with_merge_tree/configs/users.xml b/tests/integration/test_mutations_with_merge_tree/configs/users.xml
index d1a3ae1e859..c767d6361fd 100644
--- a/tests/integration/test_mutations_with_merge_tree/configs/users.xml
+++ b/tests/integration/test_mutations_with_merge_tree/configs/users.xml
@@ -1,7 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
-            <max_expanded_ast_elements>500</max_expanded_ast_elements>
+            <max_expanded_ast_elements>1800</max_expanded_ast_elements>
             <force_index_by_date>1</force_index_by_date>
             <force_primary_key>1</force_primary_key>
         </default>

From 73cf923033fb4feec9653fd6d2bd7111c652baeb Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Feb 2024 18:03:00 +0000
Subject: [PATCH 633/884] Update test

---
 .../0_stateless/02500_numbers_inference.sh       | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02500_numbers_inference.sh b/tests/queries/0_stateless/02500_numbers_inference.sh
index ce9cd5bdc9f..5d863bd616f 100755
--- a/tests/queries/0_stateless/02500_numbers_inference.sh
+++ b/tests/queries/0_stateless/02500_numbers_inference.sh
@@ -8,10 +8,10 @@ $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')";
 echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
 $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')";
 echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
-$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')";
-echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
-$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')";
-echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')" --input_format_try_infer_exponent_floats=1;
+echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1;
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')" --input_format_try_infer_exponent_floats=1;
+echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1;
 $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')";
 echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
 
@@ -19,10 +19,10 @@ $CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')";
 echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
 $CLICKHOUSE_LOCAL -q "desc format(TSV, '1')";
 echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
-$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')";
-echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
-$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')";
-echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')" --input_format_try_infer_exponent_floats=1;
+echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1;
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')" --input_format_try_infer_exponent_floats=1;
+echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1;
 $CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')";
 echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
 

From ed5591e68cce58624bab515286a128352213377e Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 13 Feb 2024 18:20:57 +0000
Subject: [PATCH 634/884] changes due to review

---
 src/Functions/FunctionBinaryArithmetic.h | 78 ++++++++++--------------
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 9b0afee5053..d2e74b0cb71 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -148,14 +148,7 @@ public:
     static constexpr bool allow_decimal = IsOperation<Operation>::allow_decimal;
     static constexpr bool only_integer = IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero;
 
-    /// Appropriate result type for binary operator on numeric types. "Date" can also mean
-    /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
-    using ResultDataType = Switch<
-        /// Result must be Integer
-        Case<IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero, DataTypeFromFieldType<typename Op::ResultType>>,
-
-        /// Decimal cases
-        Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
+    using DecimalResultType = Switch<
         Case<
             IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
             LeftDataType>,
@@ -163,6 +156,18 @@ public:
         Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
         Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,
 
+        /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
+        Case<IsDataTypeDecimal<LeftDataType> && !IsIntegralOrExtendedOrDecimal<RightDataType>, InvalidType>,
+        Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType)
+
+    /// Appropriate result type for binary operator on numeric types. "Date" can also mean
+    /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
+    using ResultDataType = Switch<
+        /// Result must be Integer
+        Case<IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero, DataTypeFromFieldType<typename Op::ResultType>>,
+        /// Decimal cases
+        Case<IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>, DecimalResultType>,
+
         /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64
         Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>,
         Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>, DataTypeFloat64>,
@@ -1669,26 +1674,23 @@ public:
 
                 if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
                 {
+                    if constexpr (is_div_int || is_div_int_or_zero)
+                        type_res = std::make_shared<ResultDataType>();
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr (is_div_int || is_div_int_or_zero)
-                            type_res = std::make_shared<ResultDataType>();
-                        else
+                        if constexpr (is_division)
                         {
-                            if constexpr (is_division)
+                            if (context->getSettingsRef().decimal_check_overflow)
                             {
-                                if (context->getSettingsRef().decimal_check_overflow)
-                                {
-                                    /// Check overflow by using operands scale (based on big decimal division implementation details):
-                                    /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
-                                    /// i.e. int_operand = decimal_operand*10^scale
-                                    /// For division, left operand will be scaled by right operand scale also to do big integer division,
-                                    /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
-                                    /// So, we can check upfront possible overflow just by checking max scale used for left operand
-                                    /// Note: it doesn't detect all possible overflow during big decimal division
-                                    if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
-                                        throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
-                                }
+                                /// Check overflow by using operands scale (based on big decimal division implementation details):
+                                /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
+                                /// i.e. int_operand = decimal_operand*10^scale
+                                /// For division, left operand will be scaled by right operand scale also to do big integer division,
+                                /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
+                                /// So, we can check upfront possible overflow just by checking max scale used for left operand
+                                /// Note: it doesn't detect all possible overflow during big decimal division
+                                if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
+                                    throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
                             }
                             ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
                             type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
@@ -1697,24 +1699,15 @@ public:
                     else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
                         (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)))
                     {
-                        if constexpr (is_div_int || is_div_int_or_zero)
-                            type_res = std::make_shared<ResultDataType>();
-                        else
-                            type_res = std::make_shared<DataTypeFloat64>();
+                        type_res = std::make_shared<DataTypeFloat64>();
                     }
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
                     {
-                        if constexpr (is_div_int || is_div_int_or_zero)
-                            type_res = std::make_shared<ResultDataType>();
-                        else
-                            type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
+                        type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
                     }
                     else if constexpr (IsDataTypeDecimal<RightDataType>)
                     {
-                        if constexpr (is_div_int || is_div_int_or_zero)
-                            type_res = std::make_shared<ResultDataType>();
-                        else
-                            type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
+                        type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
                     }
                     else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime>)
                     {
@@ -2024,6 +2017,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
         using LeftDataType = std::decay_t<decltype(left)>;
         using RightDataType = std::decay_t<decltype(right)>;
         using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType;
+        using DecimalResultType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::DecimalResultType;
 
         if constexpr (std::is_same_v<ResultDataType, InvalidType>)
             return nullptr;
@@ -2075,19 +2069,9 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                     col_left_size,
                     right_nullmap);
             }
+            /// Here we check if we have `intDiv` or `intDivOrZero` and at least one of the arguments is decimal, because in this case originally we had result as decimal, so we need to convert result into integer after calculations
             else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>))
             {
-                using DecimalResultType = Switch<
-                    Case<
-                        IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
-                        LeftDataType>,
-                    Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>, RightDataType>,
-                    Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
-                    Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,
-
-                    /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
-                    Case<IsDataTypeDecimal<LeftDataType> && !IsIntegralOrExtendedOrDecimal<RightDataType>, InvalidType>,
-                    Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType)
 
                 if constexpr (!std::is_same_v<DecimalResultType, InvalidType>)
                 {

From 84e8755aae2d111a121bbee592a067d848fb3dcc Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Tue, 13 Feb 2024 19:27:29 +0000
Subject: [PATCH 635/884] rename a setting

---
 src/Core/Settings.h                                         | 2 +-
 src/Core/SettingsChangesHistory.h                           | 3 ++-
 src/Functions/keyvaluepair/extractKeyValuePairs.cpp         | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.reference  | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.sql        | 6 +++---
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 44badfefabb..c3a2f98bffe 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -858,7 +858,7 @@ class IColumn;
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
+    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index c453dd837eb..5b21347e80b 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -93,7 +93,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
               {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
               {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
-              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}},
+              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."},
+              {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
index 34081cddb92..94f02861af0 100644
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@@ -43,11 +43,11 @@ class ExtractKeyValuePairs : public IFunction
             builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
         }
 
-        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_kvp_max_pairs_per_row == 0;
+        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row == 0;
 
         if (!is_number_of_pairs_unlimited)
         {
-            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_key_value_pairs_max_pairs_per_row);
         }
 
         return builder.build();
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index f646583bbd3..9a0cfdffcb5 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -293,7 +293,7 @@ SELECT
 {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'}
 -- { echoOn }
 
-SET extract_kvp_max_pairs_per_row = 2;
+SET extract_key_value_pairs_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -307,7 +307,7 @@ WITH
 SELECT
     x;
 {'key1':'value1','key2':'value2'}
-SET extract_kvp_max_pairs_per_row = 0;
+SET extract_key_value_pairs_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 9277ba6d7ec..4f3db3f166b 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -415,7 +415,7 @@ SELECT
     x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 
 -- Should fail allowed because it exceeds the max number of pairs
-SET extract_kvp_max_pairs_per_row = 1;
+SET extract_key_value_pairs_max_pairs_per_row = 1;
 WITH
     extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
     CAST(
@@ -429,7 +429,7 @@ SELECT
 
 -- { echoOn }
 
-SET extract_kvp_max_pairs_per_row = 2;
+SET extract_key_value_pairs_max_pairs_per_row = 2;
 -- Should be allowed because it no longer exceeds the max number of pairs
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH
@@ -443,7 +443,7 @@ WITH
 SELECT
     x;
 
-SET extract_kvp_max_pairs_per_row = 0;
+SET extract_key_value_pairs_max_pairs_per_row = 0;
 -- Should be allowed because max pairs per row is set to 0 (unlimited)
 -- expected output: {'key1':'value1','key2':'value2'}
 WITH

From a27639a7f90c0575618ee4433c6545aa554c7750 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 13 Feb 2024 22:06:00 +0000
Subject: [PATCH 636/884] Fix: call first exception callback on exception

it'll update query_log
---
 src/Interpreters/executeQuery.cpp                    |  6 +++++-
 .../test_insert_exception_over_http/test.py          | 12 ++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 345472fe4d0..6dd6d8eb6a0 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1511,13 +1511,17 @@ void executeQuery(
     }
     catch (...)
     {
+        /// first execute on exception callback, it includes updating query_log
+        /// otherwise closing record ('ExceptionWhileProcessing') can be not appended in query_log
+        /// due to possible exceptions in functions called bellow (passed as parameter here)
+        streams.onException();
+
         if (handle_exception_in_output_format)
         {
             update_format_on_exception_if_needed();
             if (output_format)
                 handle_exception_in_output_format(*output_format);
         }
-        streams.onException();
         throw;
     }
 
diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py
index 962c20d6bff..826e0d01a81 100644
--- a/tests/integration/test_insert_exception_over_http/test.py
+++ b/tests/integration/test_insert_exception_over_http/test.py
@@ -25,13 +25,17 @@ def test_insert_exception_over_http(start_cluster):
     )
 
     assert True == instance.http_query_and_get_error(
-        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)"
+        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)", method='POST'
     ).startswith("500 Internal Server Error")
 
     assert "0\n" == instance.query("select count() from tt")
+
     instance.query("SYSTEM FLUSH LOGS")
-    assert "2\n" == instance.query(
-        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday()"
-    )
+
+    assert "1\n" == instance.query(
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'")
+
+    assert "1\n" == instance.query(
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'")
 
     instance.query("DROP TABLE tt SYNC")

From bc456820a0286d290f606ea5413e444614b9ddf7 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 13 Feb 2024 22:19:27 +0000
Subject: [PATCH 637/884] Automatic style fix

---
 .../integration/test_insert_exception_over_http/test.py  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py
index 826e0d01a81..a03d68e0b03 100644
--- a/tests/integration/test_insert_exception_over_http/test.py
+++ b/tests/integration/test_insert_exception_over_http/test.py
@@ -25,7 +25,8 @@ def test_insert_exception_over_http(start_cluster):
     )
 
     assert True == instance.http_query_and_get_error(
-        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)", method='POST'
+        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)",
+        method="POST",
     ).startswith("500 Internal Server Error")
 
     assert "0\n" == instance.query("select count() from tt")
@@ -33,9 +34,11 @@ def test_insert_exception_over_http(start_cluster):
     instance.query("SYSTEM FLUSH LOGS")
 
     assert "1\n" == instance.query(
-        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'")
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'"
+    )
 
     assert "1\n" == instance.query(
-        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'")
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'"
+    )
 
     instance.query("DROP TABLE tt SYNC")

From 3d91b12a2fc29d44f2631a811e960ddc5936c922 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 13 Feb 2024 22:34:10 +0000
Subject: [PATCH 638/884] Fix typo

---
 src/Interpreters/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 6dd6d8eb6a0..e8a032974c6 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1513,7 +1513,7 @@ void executeQuery(
     {
         /// first execute on exception callback, it includes updating query_log
         /// otherwise closing record ('ExceptionWhileProcessing') can be not appended in query_log
-        /// due to possible exceptions in functions called bellow (passed as parameter here)
+        /// due to possible exceptions in functions called below (passed as parameter here)
         streams.onException();
 
         if (handle_exception_in_output_format)

From ac601a0cb9a8a81e0b690df317b9c7a98f17e1ad Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 13 Feb 2024 23:12:21 +0000
Subject: [PATCH 639/884] fix build

---
 src/Functions/FunctionBinaryArithmetic.h | 31 +++++++++++++++++-------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 667854a622b..fa2ff8fc275 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -147,17 +147,24 @@ private: /// it's not correct for Decimal
 public:
     static constexpr bool allow_decimal = IsOperation<Operation>::allow_decimal;
 
-    using DecimalResultType = Switch<
-        Case<
-            IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
-            LeftDataType>,
+    using DecimalResultDataType = Switch<
+        Case<!allow_decimal, InvalidType>,
+        Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>, LeftDataType>,
         Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>, RightDataType>,
         Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
         Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,
 
-        /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
+        /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64
+        Case<IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>,
+        Case<IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>, DataTypeFloat64>,
+
+        Case<IsOperation<Operation>::bit_hamming_distance && IsIntegral<LeftDataType> && IsIntegral<RightDataType>, DataTypeUInt8>,
+        Case<IsOperation<Operation>::bit_hamming_distance && IsFixedString<LeftDataType> && IsFixedString<RightDataType>, DataTypeUInt16>,
+        Case<IsOperation<Operation>::bit_hamming_distance && IsString<LeftDataType> && IsString<RightDataType>, DataTypeUInt64>,
+
+          /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
         Case<IsDataTypeDecimal<LeftDataType> && !IsIntegralOrExtendedOrDecimal<RightDataType>, InvalidType>,
-        Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType)
+        Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>>;
 
     /// Appropriate result type for binary operator on numeric types. "Date" can also mean
     /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
@@ -165,7 +172,13 @@ public:
         /// Result must be Integer
         Case<IsOperation<Operation>::div_int || IsOperation<Operation>::div_int_or_zero, DataTypeFromFieldType<typename Op::ResultType>>,
         /// Decimal cases
-        Case<IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>, DecimalResultType>,
+        Case<IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>, DecimalResultDataType>,
+        Case<
+            IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
+            LeftDataType>,
+        Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>, RightDataType>,
+        Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
+        Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,
 
         /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64
         Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>,
@@ -1678,7 +1691,7 @@ public:
                 {
                     if constexpr (is_div_int || is_div_int_or_zero)
                         type_res = std::make_shared<ResultDataType>();
-                    if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
+                    else if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
                         if constexpr (is_division)
                         {
@@ -2019,7 +2032,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
         using LeftDataType = std::decay_t<decltype(left)>;
         using RightDataType = std::decay_t<decltype(right)>;
         using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType;
-        using DecimalResultType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::DecimalResultType;
+        using DecimalResultType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::DecimalResultDataType;
 
         if constexpr (std::is_same_v<ResultDataType, InvalidType>)
             return nullptr;

From 2b4949bd18f3969226bf35e04950d6463f97607e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 00:21:13 +0100
Subject: [PATCH 640/884] Cleanups

---
 programs/local/LocalServer.cpp              | 2 +-
 src/Core/Settings.h                         | 2 +-
 src/Databases/IDatabase.h                   | 2 +-
 src/Functions/widthBucket.cpp               | 4 ++--
 src/Interpreters/ClusterDiscovery.cpp       | 2 +-
 src/Interpreters/InterpreterSystemQuery.cpp | 2 +-
 src/TableFunctions/ITableFunctionFileLike.h | 1 -
 src/TableFunctions/TableFunctionFile.h      | 1 +
 utils/check-style/check-style               | 5 +++++
 9 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index d7acf4112a5..5b455a59a07 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -249,7 +249,7 @@ void LocalServer::tryInitPath()
         default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed());
 
         if (exists(default_path))
-            throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} exist!", default_path.string());
+            throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} already exists.", default_path.string());
 
         create_directory(default_path);
         temporary_directory_to_delete = default_path;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 44badfefabb..abf0230c68d 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -831,7 +831,7 @@ class IColumn;
     M(UInt64, insert_keeper_retry_max_backoff_ms, 10000, "Max backoff timeout for keeper operations during insert", 0) \
     M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \
     M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
-    M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
+    M(Bool, force_aggregation_in_order, false, "The setting is used by the server itself to support distributed queries. Do not change it manually, because it will break normal operations. (Forces use of aggregation in order on remote nodes during distributed aggregation).", IMPORTANT) \
     M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \
     M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \
     M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 2f448cd7036..ec380fa759d 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -407,7 +407,7 @@ public:
 
     virtual void stopReplication()
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread", getEngineName());
     }
 
     virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; }
diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp
index e95f7c05756..62ed460ca9d 100644
--- a/src/Functions/widthBucket.cpp
+++ b/src/Functions/widthBucket.cpp
@@ -44,7 +44,7 @@ class FunctionWidthBucket : public IFunction
         {
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
-                "Logical error in function {}: argument {} has unexpected type or size!",
+                "Logical error in function {}: argument {} has unexpected type or size.",
                 getName(),
                 argument_index);
         }
@@ -157,7 +157,7 @@ class FunctionWidthBucket : public IFunction
         if (are_all_const_cols)
         {
             throw Exception(
-                ErrorCodes::LOGICAL_ERROR, "Logical error in function {}: unexpected combination of argument types!", getName());
+                ErrorCodes::LOGICAL_ERROR, "Logical error in function {}: unexpected combination of argument types.", getName());
         }
 
         auto result_column = ColumnVector<ResultType>::create();
diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp
index 52b74597c4b..d432488964d 100644
--- a/src/Interpreters/ClusterDiscovery.cpp
+++ b/src/Interpreters/ClusterDiscovery.cpp
@@ -319,7 +319,7 @@ bool ClusterDiscovery::updateCluster(ClusterInfo & cluster_info)
 
     if (cluster_info.current_cluster_is_invisible)
     {
-        LOG_DEBUG(log, "cluster '{}' is invisible!", cluster_info.name);
+        LOG_DEBUG(log, "Cluster '{}' is invisible.", cluster_info.name);
         return true;
     }
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 9a80553f149..19449cd9e28 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -1083,7 +1083,7 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query)
         auto sync_timeout = getContext()->getSettingsRef().receive_timeout.totalMilliseconds();
         if (!storage_replicated->waitForProcessingQueue(sync_timeout, query.sync_replica_mode, query.src_replicas))
         {
-            LOG_ERROR(log, "SYNC REPLICA {}: Timed out!", table_id.getNameForLogs());
+            LOG_ERROR(log, "SYNC REPLICA {}: Timed out.", table_id.getNameForLogs());
             throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "SYNC REPLICA {}: command timed out. " \
                     "See the 'receive_timeout' setting", table_id.getNameForLogs());
         }
diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h
index 5fe86587797..4a316cedb9b 100644
--- a/src/TableFunctions/ITableFunctionFileLike.h
+++ b/src/TableFunctions/ITableFunctionFileLike.h
@@ -42,7 +42,6 @@ protected:
     virtual String getFormatFromFirstArgument();
 
     String filename;
-    String path_to_archive;
     String format = "auto";
     String structure = "auto";
     String compression_method = "auto";
diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h
index 6eaab29db8a..fd4fedc21a9 100644
--- a/src/TableFunctions/TableFunctionFile.h
+++ b/src/TableFunctions/TableFunctionFile.h
@@ -26,6 +26,7 @@ public:
 
 protected:
     int fd = -1;
+    String path_to_archive;
     void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override;
     String getFormatFromFirstArgument() override;
 
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index daee2e7fb00..badfd173172 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -274,6 +274,11 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
     xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
 
+# Exclamation mark in a message
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)."
+
 # Trailing whitespaces
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |

From 3c0abe08cb00245cde18e6e57018c87566d228bb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 14 Feb 2024 00:29:24 +0000
Subject: [PATCH 641/884] Update version_date.tsv and changelogs after
 v24.1.4.19-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.4.19-stable.md | 28 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.1.4.19-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index e8293869a56..618b3a1faa7 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.19"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 3cf89f1b841..26c401a89fe 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.19"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index d477d8aaca1..3a9cbd674a8 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.19"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.4.19-stable.md b/docs/changelogs/v24.1.4.19-stable.md
new file mode 100644
index 00000000000..5083c94f490
--- /dev/null
+++ b/docs/changelogs/v24.1.4.19-stable.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.4.19-stable (334aecf7e4b) FIXME as compared to v24.1.3.31-stable (135b08cbd28)
+
+#### Improvement
+* Backported in [#59826](https://github.com/ClickHouse/ClickHouse/issues/59826): In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#59885](https://github.com/ClickHouse/ClickHouse/issues/59885): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 1e931b95b41..56fe1adafe8 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v24.1.4.19-stable	2024-02-14
 v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30

From 27f115f581021e6b3367a6328abb65d8c22ee21b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 02:17:23 +0100
Subject: [PATCH 642/884] Use merge table in the dashboard

---
 programs/server/dashboard.html                |   2 +-
 .../System/StorageSystemDashboards.cpp        | 206 +++++++++++++++---
 2 files changed, 173 insertions(+), 35 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index 3c2916b6a16..5d51b8056e7 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -521,7 +521,7 @@ const errorMessages = [
 
 
 /// Query to fill `queries` list for the dashboard
-let search_query = `SELECT title, query FROM system.dashboards WHERE dashboard = 'overview'`;
+let search_query = `SELECT title, query FROM system.dashboards WHERE dashboard = 'Overview'`;
 let customized = false;
 let queries = [];
 
diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp
index 7e545757129..60175b6fa2c 100644
--- a/src/Storages/System/StorageSystemDashboards.cpp
+++ b/src/Storages/System/StorageSystemDashboards.cpp
@@ -26,192 +26,330 @@ void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr,
 {
     static const std::vector<std::map<String, String>> dashboards
     {
+        /// Default dashboard for self-managed ClickHouse
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Queries/second" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_Query)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "CPU Usage (cores)" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Queries Running" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Query)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Merges Running" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Merge)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Selected Bytes/second" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedBytes)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "IO Wait" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSIOWaitMicroseconds) / 1000000
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "CPU Wait" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUWaitMicroseconds) / 1000000
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "OS CPU Usage (Userspace)" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
+FROM merge('system', '^asynchronous_metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSUserTimeNormalized'
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "OS CPU Usage (Kernel)" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
+FROM merge('system', '^asynchronous_metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSSystemTimeNormalized'
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Read From Disk" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadBytes)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Read From Filesystem" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadChars)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Memory (tracked)" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_MemoryTracking)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Load Average (15 minutes)" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
+FROM merge('system', '^asynchronous_metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'LoadAverage15'
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Selected Rows/second" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedRows)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Inserted Rows/second" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_InsertedRows)
-FROM system.metric_log
+FROM merge('system', '^metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Total MergeTree Parts" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
+FROM merge('system', '^asynchronous_metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'TotalPartsOfMergeTreeTables'
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
         {
-            { "dashboard", "overview" },
+            { "dashboard", "Overview" },
             { "title", "Max Parts For Partition" },
             { "query", trim(R"EOQ(
 SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)
-FROM system.asynchronous_metric_log
+FROM merge('system', '^asynchronous_metric_log')
 WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'MaxPartCountForPartition'
 GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
+        },
+        
+        /// Default dashboard for ClickHouse Cloud
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Queries/second" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_Query) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "CPU Usage (cores)" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(metric) / 1000000\nFROM (\n  SELECT event_time, sum(ProfileEvent_OSCPUVirtualTimeMicroseconds) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32} GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Queries Running" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(CurrentMetric_Query) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Merges Running" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(CurrentMetric_Merge) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Selected Bytes/second" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_SelectedBytes) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "IO Wait (local fs)" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_OSIOWaitMicroseconds) / 1000000 AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "S3 read wait" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_ReadBufferFromS3Microseconds) / 1000000 AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "S3 read errors/sec" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_ReadBufferFromS3RequestsErrors) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "CPU Wait" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_OSCPUWaitMicroseconds) / 1000000 AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "OS CPU Usage (Userspace, normalized)" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\nWHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}\nAND metric = 'OSUserTimeNormalized'\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "OS CPU Usage (Kernel, normalized)" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\nWHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}\nAND metric = 'OSSystemTimeNormalized'\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Read From Disk (bytes/sec)" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_OSReadBytes) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Read From Filesystem (bytes/sec)" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_OSReadChars) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Memory (tracked, bytes)" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(CurrentMetric_MemoryTracking) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Load Average (15 minutes)" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM (\n  SELECT event_time, sum(value) AS value\n  FROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n    AND metric = 'LoadAverage15'\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Selected Rows/sec" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_SelectedRows) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Inserted Rows/sec" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_InsertedRows) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Total MergeTree Parts" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)\nFROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\nWHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}\nAND metric = 'TotalPartsOfMergeTreeTables'\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Max Parts For Partition" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)\nFROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\nWHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}\nAND metric = 'MaxPartCountForPartition'\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Read From S3 (bytes/sec)" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_ReadBufferFromS3Bytes) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Filesystem Cache Size" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(CurrentMetric_FilesystemCacheSize) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Disk S3 write req/sec" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_DiskS3PutObject + ProfileEvent_DiskS3UploadPart + ProfileEvent_DiskS3CreateMultipartUpload + ProfileEvent_DiskS3CompleteMultipartUpload) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Disk S3 read req/sec" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_DiskS3GetObject + ProfileEvent_DiskS3HeadObject + ProfileEvent_DiskS3ListObjects) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "FS cache hit rate" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, sum(ProfileEvent_CachedReadBufferReadFromCacheBytes) / (sum(ProfileEvent_CachedReadBufferReadFromCacheBytes) + sum(ProfileEvent_CachedReadBufferReadFromSourceBytes)) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Page cache hit rate" },
+            { "query", "SELECT \n  toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t,\n  avg(metric)\nFROM (\n  SELECT event_time, greatest(0, (sum(ProfileEvent_OSReadChars) - sum(ProfileEvent_OSReadBytes)) / (sum(ProfileEvent_OSReadChars) + sum(ProfileEvent_ReadBufferFromS3Bytes))) AS metric \n  FROM clusterAllReplicas(default, merge('system', '^metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Network receive bytes/sec" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM (\n  SELECT event_time, sum(value) AS value\n  FROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n    AND metric LIKE 'NetworkReceiveBytes%'\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
+        },
+        {
+            { "dashboard", "Cloud overview" },
+            { "title", "Network send bytes/sec" },
+            { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM (\n  SELECT event_time, sum(value) AS value\n  FROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\n  WHERE event_date >= toDate(now() - {seconds:UInt32})\n    AND event_time >= now() - {seconds:UInt32}\n    AND metric LIKE 'NetworkSendBytes%'\n  GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" }
         }
     };
 

From 7b3e57e2d255670c5785ff4f68958e456f921916 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 02:37:42 +0100
Subject: [PATCH 643/884] Use merge table in the dashboard

---
 src/Storages/System/StorageSystemDashboards.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp
index 60175b6fa2c..7c9e8b73519 100644
--- a/src/Storages/System/StorageSystemDashboards.cpp
+++ b/src/Storages/System/StorageSystemDashboards.cpp
@@ -214,7 +214,6 @@ GROUP BY t
 ORDER BY t WITH FILL STEP {rounding:UInt32}
 )EOQ") }
         },
-        
         /// Default dashboard for ClickHouse Cloud
         {
             { "dashboard", "Cloud overview" },

From 98f7602ea14dca341720792a2245e771f3f2cade Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 03:01:03 +0100
Subject: [PATCH 644/884] Less conflicts

---
 programs/disks/DisksApp.cpp                    | 5 +++++
 programs/disks/DisksApp.h                      | 1 +
 src/Disks/ObjectStorages/DiskObjectStorage.cpp | 1 +
 src/Disks/ObjectStorages/IMetadataStorage.h    | 5 +++++
 4 files changed, 12 insertions(+)

diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp
index 4b3b83238a0..11273a9f907 100644
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@@ -89,6 +89,11 @@ void DisksApp::processOptions()
         config().setString("log-level", options["log-level"].as<String>());
 }
 
+DisksApp::~DisksApp()
+{
+    global_context->shutdown();
+}
+
 void DisksApp::init(std::vector<String> & common_arguments)
 {
     stopOptionsProcessing();
diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h
index 0b596921707..81e05d1d3cb 100644
--- a/programs/disks/DisksApp.h
+++ b/programs/disks/DisksApp.h
@@ -21,6 +21,7 @@ class DisksApp : public Poco::Util::Application, public Loggers
 {
 public:
     DisksApp() = default;
+    ~DisksApp() override;
 
     void init(std::vector<String> & common_arguments);
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 141aa74e417..2a648f28f14 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -389,6 +389,7 @@ void DiskObjectStorage::shutdown()
 {
     LOG_INFO(log, "Shutting down disk {}", name);
     object_storage->shutdown();
+    metadata_storage->shutdown();
     LOG_INFO(log, "Disk {} shut down", name);
 }
 
diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h
index f08b0d594bd..f95db2e1eee 100644
--- a/src/Disks/ObjectStorages/IMetadataStorage.h
+++ b/src/Disks/ObjectStorages/IMetadataStorage.h
@@ -210,6 +210,11 @@ public:
         throwNotImplemented();
     }
 
+    virtual void shutdown()
+    {
+        /// This method is overridden for specific metadata implementations in ClickHouse Cloud.
+    }
+
     virtual ~IMetadataStorage() = default;
 
     /// ==== More specific methods. Previous were almost general purpose. ====

From 39378deac1b64b41896a007a2f0d22a5f8816af4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 03:09:40 +0100
Subject: [PATCH 645/884] Less conflicts

---
 programs/disks/DisksApp.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp
index 11273a9f907..44d46e7d24a 100644
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@@ -139,6 +139,7 @@ void DisksApp::parseAndCheckOptions(
         .options(options_description_)
         .positional(positional_options_description)
         .allow_unregistered();
+
     po::parsed_options parsed = parser.run();
     po::store(parsed, options);
 
@@ -204,8 +205,8 @@ int DisksApp::main(const std::vector<String> & /*args*/)
         po::parsed_options parsed = parser.run();
         po::store(parsed, options);
         po::notify(options);
-        args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
 
+        args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
         command->processOptions(config(), options);
     }
     else

From c49651ef8906c96a68e81d02e664770a26d6e291 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 03:31:31 +0100
Subject: [PATCH 646/884] Even less conflicts

---
 programs/disks/CMakeLists.txt  | 4 ++++
 programs/disks/CommandRead.cpp | 1 -
 programs/disks/DisksApp.cpp    | 6 ++++++
 programs/disks/DisksApp.h      | 2 +-
 programs/disks/ICommand.h      | 1 +
 5 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt
index 9477854a58b..f0949fcfceb 100644
--- a/programs/disks/CMakeLists.txt
+++ b/programs/disks/CMakeLists.txt
@@ -11,6 +11,10 @@ set (CLICKHOUSE_DISKS_SOURCES
     CommandRemove.cpp
     CommandWrite.cpp)
 
+if (CLICKHOUSE_CLOUD)
+    set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp)
+endif ()
+
 set (CLICKHOUSE_DISKS_LINK
     PRIVATE
         boost::program_options
diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp
index 85041faf22c..0f3ac7ab98c 100644
--- a/programs/disks/CommandRead.cpp
+++ b/programs/disks/CommandRead.cpp
@@ -61,7 +61,6 @@ public:
             auto out = disk->writeFile(relative_path_output);
             copyData(*in, *out);
             out->finalize();
-            return;
         }
         else
         {
diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp
index 44d46e7d24a..b7c3c7f5c97 100644
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@@ -65,6 +65,9 @@ void DisksApp::addOptions(
     positional_options_description.add("command_name", 1);
 
     supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"};
+#ifdef CLICKHOUSE_CLOUD
+    supported_commands.insert("packed-io");
+#endif
 
     command_descriptions.emplace("list-disks", makeCommandListDisks());
     command_descriptions.emplace("list", makeCommandList());
@@ -75,6 +78,9 @@ void DisksApp::addOptions(
     command_descriptions.emplace("write", makeCommandWrite());
     command_descriptions.emplace("read", makeCommandRead());
     command_descriptions.emplace("mkdir", makeCommandMkDir());
+#ifdef CLICKHOUSE_CLOUD
+    command_descriptions.emplace("packed-io", makeCommandPackedIO());
+#endif
 }
 
 void DisksApp::processOptions()
diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h
index 81e05d1d3cb..51bc3f58dc4 100644
--- a/programs/disks/DisksApp.h
+++ b/programs/disks/DisksApp.h
@@ -53,9 +53,9 @@ protected:
     std::vector<String> command_arguments;
 
     std::unordered_set<String> supported_commands;
-
     std::unordered_map<String, CommandPtr> command_descriptions;
 
     po::variables_map options;
 };
+
 }
diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h
index da106e1084e..efe350fe87b 100644
--- a/programs/disks/ICommand.h
+++ b/programs/disks/ICommand.h
@@ -63,3 +63,4 @@ DB::CommandPtr makeCommandRead();
 DB::CommandPtr makeCommandRemove();
 DB::CommandPtr makeCommandWrite();
 DB::CommandPtr makeCommandMkDir();
+DB::CommandPtr makeCommandPackedIO();

From 0ccad7481020ce7919669ede385781e95028d8f3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 03:44:52 +0100
Subject: [PATCH 647/884] Even less conflicts

---
 src/Core/ServerUUID.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Core/ServerUUID.cpp b/src/Core/ServerUUID.cpp
index bcc1fecb529..c2de6be7794 100644
--- a/src/Core/ServerUUID.cpp
+++ b/src/Core/ServerUUID.cpp
@@ -14,6 +14,11 @@ namespace ErrorCodes
 }
 
 void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log)
+{
+    server_uuid = loadServerUUID(server_uuid_file, log);
+}
+
+UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log)
 {
     /// Write a uuid file containing a unique uuid if the file doesn't already exist during server start.
 
@@ -25,8 +30,7 @@ void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log)
             ReadBufferFromFile in(server_uuid_file);
             readUUIDText(uuid, in);
             assertEOF(in);
-            server_uuid = uuid;
-            return;
+            return uuid;
         }
         catch (...)
         {
@@ -44,7 +48,7 @@ void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log)
         out.write(uuid_str.data(), uuid_str.size());
         out.sync();
         out.finalize();
-        server_uuid = new_uuid;
+        return new_uuid;
     }
     catch (...)
     {

From 40ac23e958330e32fd0437fb42f0ce7cbc7fe84c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 04:12:50 +0100
Subject: [PATCH 648/884] Even less conflicts

---
 src/Core/ServerUUID.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Core/ServerUUID.h b/src/Core/ServerUUID.h
index b5ea17426cb..71ae9edc00e 100644
--- a/src/Core/ServerUUID.h
+++ b/src/Core/ServerUUID.h
@@ -21,4 +21,6 @@ public:
     static void load(const fs::path & server_uuid_file, Poco::Logger * log);
 };
 
+UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log);
+
 }

From 1fbc52fa40fc7967609ed9da54d2bf0128fc4ee3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Feb 2024 05:52:39 +0100
Subject: [PATCH 649/884] Remove Aerospike

---
 docker/test/integration/runner/Dockerfile     |  1 -
 tests/integration/README.md                   |  1 -
 tests/integration/helpers/external_sources.py | 89 -------------------
 3 files changed, 91 deletions(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 473278104b2..b876f7b9635 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -62,7 +62,6 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
 # kazoo 2.10.0 is broken
 # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
 RUN python3 -m pip install --no-cache-dir \
-    aerospike==11.1.0 \
     PyMySQL==1.1.0 \
     asyncio==3.4.3 \
     avro==1.10.2 \
diff --git a/tests/integration/README.md b/tests/integration/README.md
index e7ba37bfb56..1b5a0ee8994 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -19,7 +19,6 @@ Don't use Docker from your system repository.
 ```
 sudo -H pip install \
     PyMySQL \
-    aerospike \
     avro \
     cassandra-driver \
     confluent-kafka \
diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py
index afb91083d57..cccf151e73e 100644
--- a/tests/integration/helpers/external_sources.py
+++ b/tests/integration/helpers/external_sources.py
@@ -4,7 +4,6 @@ import os
 import uuid
 import warnings
 
-import aerospike
 import cassandra.cluster
 import pymongo
 import pymysql.cursors
@@ -696,91 +695,3 @@ class SourceRedis(ExternalSource):
             or layout.is_complex
             and self.storage_type == "hash_map"
         )
-
-
-class SourceAerospike(ExternalSource):
-    def __init__(
-        self,
-        name,
-        internal_hostname,
-        internal_port,
-        docker_hostname,
-        docker_port,
-        user,
-        password,
-    ):
-        ExternalSource.__init__(
-            self,
-            name,
-            internal_hostname,
-            internal_port,
-            docker_hostname,
-            docker_port,
-            user,
-            password,
-        )
-        self.namespace = "test"
-        self.set = "test_set"
-
-    def get_source_str(self, table_name):
-        print("AEROSPIKE get source str")
-        return """
-            <aerospike>
-                <host>{host}</host>
-                <port>{port}</port>
-            </aerospike>
-        """.format(
-            host=self.docker_hostname,
-            port=self.docker_port,
-        )
-
-    def prepare(self, structure, table_name, cluster):
-        config = {"hosts": [(self.internal_hostname, self.internal_port)]}
-        self.client = aerospike.client(config).connect()
-        self.prepared = True
-        print("PREPARED AEROSPIKE")
-        print(config)
-
-    def compatible_with_layout(self, layout):
-        print("compatible AEROSPIKE")
-        return layout.is_simple
-
-    def _flush_aerospike_db(self):
-        keys = []
-
-        def handle_record(xxx_todo_changeme):
-            (key, metadata, record) = xxx_todo_changeme
-            print(("Handle record {} {}".format(key, record)))
-            keys.append(key)
-
-        def print_record(xxx_todo_changeme1):
-            (key, metadata, record) = xxx_todo_changeme1
-            print(("Print record {} {}".format(key, record)))
-
-        scan = self.client.scan(self.namespace, self.set)
-        scan.foreach(handle_record)
-
-        [self.client.remove(key) for key in keys]
-
-    def load_kv_data(self, values):
-        self._flush_aerospike_db()
-
-        print("Load KV Data Aerospike")
-        if len(values[0]) == 2:
-            for value in values:
-                key = (self.namespace, self.set, value[0])
-                print(key)
-                self.client.put(
-                    key,
-                    {"bin_value": value[1]},
-                    policy={"key": aerospike.POLICY_KEY_SEND},
-                )
-                assert self.client.exists(key)
-        else:
-            assert "VALUES SIZE != 2"
-
-        # print(values)
-
-    def load_data(self, data, table_name):
-        print("Load Data Aerospike")
-        # print(data)

From 25667d9c9a4b354bbd2dbda16e7e19acd1ea43b2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 14 Feb 2024 08:49:14 +0000
Subject: [PATCH 650/884] Attempt II at fixing the Fuzzer issue

---
 src/Functions/countMatches.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
index 4664ad18cb3..e9880e6e93f 100644
--- a/src/Functions/countMatches.h
+++ b/src/Functions/countMatches.h
@@ -16,7 +16,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_COLUMN;
 }
 
 using Pos = const char *;
@@ -48,7 +48,7 @@ public:
         const IColumn * col_pattern = arguments[1].column.get();
         const ColumnConst * col_pattern_const = checkAndGetColumnConst<ColumnString>(col_pattern);
         if (col_pattern_const == nullptr)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Pattern argument is not const");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Pattern argument is not const");
 
         const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
 
@@ -101,7 +101,7 @@ public:
             return col_res;
         }
         else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast haystack argument to String or FixedString");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Could not cast haystack argument to String or FixedString");
     }
 
     static uint64_t countMatches(std::string_view src, const OptimizedRegularExpression & re, OptimizedRegularExpression::MatchVec & matches)

From 9e7ac6d0eecd0a618f26da92aca4d4372acd379c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <mr.felixoid@gmail.com>
Date: Wed, 14 Feb 2024 10:07:09 +0100
Subject: [PATCH 651/884] Revert "Update version_date.tsv and changelogs after
 v24.1.4.19-stable"

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.4.19-stable.md | 28 ----------------------------
 utils/list-versions/version_date.tsv |  1 -
 5 files changed, 3 insertions(+), 32 deletions(-)
 delete mode 100644 docs/changelogs/v24.1.4.19-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 618b3a1faa7..e8293869a56 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.4.19"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26c401a89fe..3cf89f1b841 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.4.19"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 3a9cbd674a8..d477d8aaca1 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.4.19"
+ARG VERSION="24.1.3.31"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.4.19-stable.md b/docs/changelogs/v24.1.4.19-stable.md
deleted file mode 100644
index 5083c94f490..00000000000
--- a/docs/changelogs/v24.1.4.19-stable.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-sidebar_position: 1
-sidebar_label: 2024
----
-
-# 2024 Changelog
-
-### ClickHouse release v24.1.4.19-stable (334aecf7e4b) FIXME as compared to v24.1.3.31-stable (135b08cbd28)
-
-#### Improvement
-* Backported in [#59826](https://github.com/ClickHouse/ClickHouse/issues/59826): In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
-
-#### Build/Testing/Packaging Improvement
-* Backported in [#59885](https://github.com/ClickHouse/ClickHouse/issues/59885): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
-
-#### Bug Fix (user-visible misbehavior in an official stable release)
-
-* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
-* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
-* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
-
-#### NOT FOR CHANGELOG / INSIGNIFICANT
-
-* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 56fe1adafe8..1e931b95b41 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,4 +1,3 @@
-v24.1.4.19-stable	2024-02-14
 v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30

From 83a184d9281bcd7eda2ed0e9daa508715e9d7a8a Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Wed, 14 Feb 2024 10:11:21 +0100
Subject: [PATCH 652/884] Fix max num to warn message

---
 src/Interpreters/Context.cpp                              | 2 +-
 tests/config/config.d/max_num_to_warn.xml                 | 4 ++--
 tests/queries/0_stateless/02931_max_num_to_warn.reference | 4 ++--
 tests/queries/0_stateless/02931_max_num_to_warn.sql       | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0e5897e7306..4f6d2c48b5b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -907,7 +907,7 @@ Strings Context::getWarnings() const
         if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<Int64>(shared->max_table_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn));
         if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<Int64>(shared->max_database_num_to_warn))
-            common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_table_num_to_warn));
+            common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn));
         if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<Int64>(shared->max_part_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of active parts is more than {}", shared->max_part_num_to_warn));
     }
diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml
index 77d68998f8e..776c270823d 100644
--- a/tests/config/config.d/max_num_to_warn.xml
+++ b/tests/config/config.d/max_num_to_warn.xml
@@ -1,5 +1,5 @@
 <clickhouse>
-    <max_table_num_to_warn>10</max_table_num_to_warn>
-    <max_database_num_to_warn>10</max_database_num_to_warn>
+    <max_table_num_to_warn>5</max_table_num_to_warn>
+    <max_database_num_to_warn>2</max_database_num_to_warn>
     <max_part_num_to_warn>10</max_part_num_to_warn>
 </clickhouse>
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference
index c0ad7354039..7de998eebfa 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.reference
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference
@@ -1,3 +1,3 @@
-The number of attached tables is more than 10
-The number of attached databases is more than 10
+The number of attached tables is more than 5
+The number of attached databases is more than 2
 The number of active parts is more than 10
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index 49b981fc355..23f04816d5a 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -37,7 +37,7 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_9 VALUES (1, 'Hello'
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello');
 
-SELECT * FROM system.warnings where message in ('The number of attached tables is more than 10', 'The number of attached databases is more than 10', 'The number of active parts is more than 10');
+SELECT * FROM system.warnings where message in ('The number of attached tables is more than 5', 'The number of attached databases is more than 2', 'The number of active parts is more than 10');
 
 DROP DATABASE IF EXISTS test_max_num_to_warn_02931;
 DROP DATABASE IF EXISTS test_max_num_to_warn_1;

From b3e5d34204815f684aede15bd9fca3a6056717d6 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Wed, 14 Feb 2024 10:16:13 +0100
Subject: [PATCH 653/884] Change used version

---
 .../integration/test_distributed_backward_compatability/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py
index 79b937558cd..21ae5f2dc59 100644
--- a/tests/integration/test_distributed_backward_compatability/test.py
+++ b/tests/integration/test_distributed_backward_compatability/test.py
@@ -8,7 +8,7 @@ node_old = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml"],
     image="clickhouse/clickhouse-server",
-    tag="23.12",
+    tag="24.1",
     stay_alive=True,
     with_installed_binary=True,
 )

From 914fd27d5b11559b0eb19e119f68acfa93c45678 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 13 Feb 2024 10:40:20 +0100
Subject: [PATCH 654/884] Add PlacementInfo

---
 programs/server/Server.cpp        |  6 +++
 src/Server/CloudPlacementInfo.cpp | 75 +++++++++++++++++++++++++++++++
 src/Server/CloudPlacementInfo.h   | 43 ++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 src/Server/CloudPlacementInfo.cpp
 create mode 100644 src/Server/CloudPlacementInfo.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index f00da445c16..53fc32663e7 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -97,6 +97,7 @@
 #include <Server/ProtocolServerAdapter.h>
 #include <Server/KeeperReadinessHandler.h>
 #include <Server/HTTP/HTTPServer.h>
+#include <Server/CloudPlacementInfo.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Core/ServerSettings.h>
 #include <filesystem>
@@ -1960,6 +1961,11 @@ try
                                          load_metadata_tasks);
         }
 
+        if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
+        {
+            PlacementInfo::PlacementInfo::instance().initialize(config());
+        }
+
         /// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only.
         load_metadata_tasks.clear();
         load_metadata_tasks.shrink_to_fit();
diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp
new file mode 100644
index 00000000000..0b45a58d05a
--- /dev/null
+++ b/src/Server/CloudPlacementInfo.cpp
@@ -0,0 +1,75 @@
+#include <Server/CloudPlacementInfo.h>
+#include <Common/logger_useful.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/S3/Credentials.h>
+#include <fmt/core.h>
+#include <filesystem>
+
+
+namespace DB
+{
+
+namespace PlacementInfo
+{
+
+namespace
+{
+    std::string getConfigPath(std::string_view path)
+    {
+        return fmt::format("{}.{}", PLACEMENT_CONFIG_PREFIX, path);
+    }
+
+    String loadAvailabilityZoneFromFile(const Poco::Util::AbstractConfiguration & config)
+    {
+        auto az_file = config.getString(getConfigPath("availability_zone_from_file"), DEFAULT_AZ_FILE_PATH);
+
+        if (!std::filesystem::exists(az_file))
+            return "";
+
+        String availability_zone_from_file;
+
+        ReadBufferFromFile in(az_file);
+        readStringUntilEOF(availability_zone_from_file, in);
+        Poco::trimInPlace(availability_zone_from_file);
+
+        return availability_zone_from_file;
+    }
+}
+
+
+void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config)
+{
+    use_imds = config.getBool(getConfigPath("use_imds"), false);
+
+    if (use_imds)
+    {
+        availability_zone = S3::getRunningAvailabilityZone();
+    }
+    else
+    {
+        availability_zone = config.getString(getConfigPath("availability_zone"), "");
+
+        if (availability_zone.empty())
+            availability_zone = loadAvailabilityZoneFromFile(config);
+
+        if (availability_zone.empty())
+            LOG_WARNING(log, "Availability zone info not found");
+    }
+
+    LOG_DEBUG(log, "Loaded info: availability_zone: {}", availability_zone);
+    initialized = true;
+}
+
+std::string PlacementInfo::getAvailabilityZone() const
+{
+    if (!initialized)
+    {
+        LOG_WARNING(log, "Placement info has not been loaded");
+        return "";
+    }
+
+    return availability_zone;
+}
+
+}
+}
diff --git a/src/Server/CloudPlacementInfo.h b/src/Server/CloudPlacementInfo.h
new file mode 100644
index 00000000000..b26f4b10e88
--- /dev/null
+++ b/src/Server/CloudPlacementInfo.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <string>
+#include <boost/core/noncopyable.hpp>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Logger.h>
+
+namespace DB
+{
+
+namespace PlacementInfo
+{
+
+static constexpr auto PLACEMENT_CONFIG_PREFIX = "placement";
+static constexpr auto DEFAULT_AZ_FILE_PATH = "/run/instance-metadata/node-zone";
+
+/// A singleton providing information on where in cloud server is running.
+class PlacementInfo : private boost::noncopyable
+{
+public:
+    static PlacementInfo & instance()
+    {
+        static PlacementInfo instance;
+        return instance;
+    }
+
+    void initialize(const Poco::Util::AbstractConfiguration & config);
+
+    std::string getAvailabilityZone() const;
+
+private:
+    PlacementInfo() = default;
+
+    LoggerPtr log = getLogger("CloudPlacementInfo");
+
+    bool initialized;
+
+    bool use_imds;
+    std::string availability_zone;
+};
+
+}
+}

From c71835ebecacb662d7995bee2bc1233ca2704a03 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 13 Feb 2024 10:40:49 +0100
Subject: [PATCH 655/884] Add an integration test

---
 .../test_placement_info/__init__.py           |  0
 .../configs/config_value.xml                  |  6 ++
 .../configs/file_value.xml                    |  6 ++
 .../test_placement_info/configs/imds.xml      |  8 ++
 .../configs/missing_value.xml                 |  5 ++
 .../metadata_servers/simple_server.py         | 30 +++++++
 tests/integration/test_placement_info/test.py | 82 +++++++++++++++++++
 7 files changed, 137 insertions(+)
 create mode 100644 tests/integration/test_placement_info/__init__.py
 create mode 100644 tests/integration/test_placement_info/configs/config_value.xml
 create mode 100644 tests/integration/test_placement_info/configs/file_value.xml
 create mode 100644 tests/integration/test_placement_info/configs/imds.xml
 create mode 100644 tests/integration/test_placement_info/configs/missing_value.xml
 create mode 100644 tests/integration/test_placement_info/metadata_servers/simple_server.py
 create mode 100644 tests/integration/test_placement_info/test.py

diff --git a/tests/integration/test_placement_info/__init__.py b/tests/integration/test_placement_info/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_placement_info/configs/config_value.xml b/tests/integration/test_placement_info/configs/config_value.xml
new file mode 100644
index 00000000000..d3f1a241962
--- /dev/null
+++ b/tests/integration/test_placement_info/configs/config_value.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <placement>
+        <use_imds>0</use_imds>
+        <availability_zone>ci-test-1b</availability_zone>
+    </placement>
+</clickhouse>
diff --git a/tests/integration/test_placement_info/configs/file_value.xml b/tests/integration/test_placement_info/configs/file_value.xml
new file mode 100644
index 00000000000..636ccacb467
--- /dev/null
+++ b/tests/integration/test_placement_info/configs/file_value.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <placement>
+        <use_imds>0</use_imds>
+        <availability_zone_from_file>/tmp/node-zone</availability_zone_from_file>
+    </placement>
+</clickhouse>
diff --git a/tests/integration/test_placement_info/configs/imds.xml b/tests/integration/test_placement_info/configs/imds.xml
new file mode 100644
index 00000000000..5fc8c1ab3dc
--- /dev/null
+++ b/tests/integration/test_placement_info/configs/imds.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <s3>
+        <use_environment_credentials>1</use_environment_credentials>
+    </s3>
+    <placement>
+        <use_imds>1</use_imds>
+    </placement>
+</clickhouse>
diff --git a/tests/integration/test_placement_info/configs/missing_value.xml b/tests/integration/test_placement_info/configs/missing_value.xml
new file mode 100644
index 00000000000..d37218428b3
--- /dev/null
+++ b/tests/integration/test_placement_info/configs/missing_value.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <placement>
+        <use_imds>0</use_imds>
+    </placement>
+</clickhouse>
diff --git a/tests/integration/test_placement_info/metadata_servers/simple_server.py b/tests/integration/test_placement_info/metadata_servers/simple_server.py
new file mode 100644
index 00000000000..73140a7d776
--- /dev/null
+++ b/tests/integration/test_placement_info/metadata_servers/simple_server.py
@@ -0,0 +1,30 @@
+import http.server
+import sys
+
+
+class RequestHandler(http.server.BaseHTTPRequestHandler):
+    def get_response(self):
+        if self.path == "/":
+            return "OK", 200
+
+        if self.path == "/latest/meta-data/placement/availability-zone":
+            return "ci-test-1a", 200
+
+        # Resource not found.
+        return 404
+
+    def do_HEAD(self):
+        response, code = self.get_response()
+        self.send_response(code)
+        self.send_header("Content-Type", "text/plain")
+        self.send_header("Content-Length", len(response.encode()))
+        self.end_headers()
+        return response, code
+
+    def do_GET(self):
+        response, _ = self.do_HEAD()
+        self.wfile.write(response.encode())
+
+
+httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
+httpd.serve_forever()
diff --git a/tests/integration/test_placement_info/test.py b/tests/integration/test_placement_info/test.py
new file mode 100644
index 00000000000..f08bdf1ddce
--- /dev/null
+++ b/tests/integration/test_placement_info/test.py
@@ -0,0 +1,82 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.mock_servers import start_mock_servers
+import os
+import time
+
+METADATA_SERVER_HOSTNAME = "resolver"
+METADATA_SERVER_PORT = 8080
+
+cluster = ClickHouseCluster(__file__)
+node_imds = cluster.add_instance(
+    "node_imds",
+    with_minio=True,
+    main_configs=["configs/imds.xml"],
+    env_variables={
+        "AWS_EC2_METADATA_SERVICE_ENDPOINT": f"http://{METADATA_SERVER_HOSTNAME}:{METADATA_SERVER_PORT}",
+    },
+    stay_alive=True,
+)
+node_config_value = cluster.add_instance(
+    "node_config_value",
+    main_configs=["configs/config_value.xml"],
+)
+node_file_value = cluster.add_instance(
+    "node_file_value",
+    main_configs=["configs/file_value.xml"],
+    stay_alive=True,
+)
+node_missing_value = cluster.add_instance(
+    "node_missing_value",
+    main_configs=["configs/missing_value.xml"],
+)
+
+def start_metadata_server():
+    script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers")
+    start_mock_servers(
+        cluster,
+        script_dir,
+        [
+            (
+                "simple_server.py",
+                METADATA_SERVER_HOSTNAME,
+                METADATA_SERVER_PORT,
+            )
+        ],
+    )
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        start_metadata_server()
+        yield
+    finally:
+        cluster.shutdown()
+
+
+def test_placement_info_from_imds():
+    node_imds.stop_clickhouse(kill=True)
+    node_imds.start_clickhouse()
+
+    node_imds.query("SYSTEM FLUSH LOGS")
+    assert node_imds.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1a")
+
+
+def test_placement_info_from_config():
+    node_config_value.query("SYSTEM FLUSH LOGS")
+    assert node_config_value.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1b")
+
+def test_placement_info_from_file():
+    node_file_value.exec_in_container(["bash", "-c", "echo ci-test-1c > /tmp/node-zone"])
+
+    node_file_value.stop_clickhouse(kill=True)
+    node_file_value.start_clickhouse()
+
+    node_file_value.query("SYSTEM FLUSH LOGS")
+    assert node_file_value.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1c")
+
+def test_placement_info_missing_data():
+    node_missing_value.query("SYSTEM FLUSH LOGS")
+    assert node_missing_value.contains_in_log("CloudPlacementInfo: Availability zone info not found")

From 3769846d4a1c9c9f93e8be09c43ed5ff5fa25497 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 13 Feb 2024 11:01:31 +0100
Subject: [PATCH 656/884] Lint

---
 tests/integration/test_placement_info/test.py | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_placement_info/test.py b/tests/integration/test_placement_info/test.py
index f08bdf1ddce..1b93a3eae0b 100644
--- a/tests/integration/test_placement_info/test.py
+++ b/tests/integration/test_placement_info/test.py
@@ -31,6 +31,7 @@ node_missing_value = cluster.add_instance(
     main_configs=["configs/missing_value.xml"],
 )
 
+
 def start_metadata_server():
     script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers")
     start_mock_servers(
@@ -61,22 +62,34 @@ def test_placement_info_from_imds():
     node_imds.start_clickhouse()
 
     node_imds.query("SYSTEM FLUSH LOGS")
-    assert node_imds.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1a")
+    assert node_imds.contains_in_log(
+        "CloudPlacementInfo: Loaded info: availability_zone: ci-test-1a"
+    )
 
 
 def test_placement_info_from_config():
     node_config_value.query("SYSTEM FLUSH LOGS")
-    assert node_config_value.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1b")
+    assert node_config_value.contains_in_log(
+        "CloudPlacementInfo: Loaded info: availability_zone: ci-test-1b"
+    )
+
 
 def test_placement_info_from_file():
-    node_file_value.exec_in_container(["bash", "-c", "echo ci-test-1c > /tmp/node-zone"])
+    node_file_value.exec_in_container(
+        ["bash", "-c", "echo ci-test-1c > /tmp/node-zone"]
+    )
 
     node_file_value.stop_clickhouse(kill=True)
     node_file_value.start_clickhouse()
 
     node_file_value.query("SYSTEM FLUSH LOGS")
-    assert node_file_value.contains_in_log("CloudPlacementInfo: Loaded info: availability_zone: ci-test-1c")
+    assert node_file_value.contains_in_log(
+        "CloudPlacementInfo: Loaded info: availability_zone: ci-test-1c"
+    )
+
 
 def test_placement_info_missing_data():
     node_missing_value.query("SYSTEM FLUSH LOGS")
-    assert node_missing_value.contains_in_log("CloudPlacementInfo: Availability zone info not found")
+    assert node_missing_value.contains_in_log(
+        "CloudPlacementInfo: Availability zone info not found"
+    )

From a7c7a5a671076dfd9be8fb3787a88ef55a392142 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 14 Feb 2024 09:50:55 +0000
Subject: [PATCH 657/884] Analyzer: Fix test_settings_profile

---
 tests/analyzer_integration_broken_tests.txt   |  1 -
 .../integration/test_settings_profile/test.py | 31 +++++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index b2576bfdb2e..bf78cbd64d8 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -9,7 +9,6 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_select_access_rights/test_main.py::test_alias_columns
-test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
 test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 5e40b534cee..70740104d63 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -454,22 +454,41 @@ def test_show_profiles():
     assert instance.query("SHOW PROFILES") == "default\nreadonly\nxyz\n"
 
     assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n"
+
+    query_possible_response = [
+        "CREATE SETTINGS PROFILE default\n",
+        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n",
+    ]
     assert (
         instance.query("SHOW CREATE SETTINGS PROFILE default")
-        == "CREATE SETTINGS PROFILE default\n"
+        in query_possible_response
     )
-    assert (
-        instance.query("SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default\n"
+
+    query_possible_response = [
+        "CREATE SETTINGS PROFILE default\n"
         "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
-        "CREATE SETTINGS PROFILE xyz\n"
-    )
+        "CREATE SETTINGS PROFILE xyz\n",
+        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
+        "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
+        "CREATE SETTINGS PROFILE xyz\n",
+    ]
+    assert instance.query("SHOW CREATE PROFILES") in query_possible_response
 
     expected_access = (
         "CREATE SETTINGS PROFILE default\n"
         "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
         "CREATE SETTINGS PROFILE xyz\n"
     )
-    assert expected_access in instance.query("SHOW ACCESS")
+    expected_access_analyzer = (
+        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
+        "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
+        "CREATE SETTINGS PROFILE xyz\n"
+    )
+
+    query_response = instance.query("SHOW ACCESS")
+    assert (
+        expected_access in query_response or expected_access_analyzer in query_response
+    )
 
 
 def test_set_profile():

From f678bdc546c400e32fb603c2e6e53e75ec2fa370 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 14 Feb 2024 10:51:42 +0100
Subject: [PATCH 658/884] Update tests

---
 programs/keeper/Keeper.cpp                   |  6 +++++
 src/Coordination/KeeperContext.cpp           | 24 ++++----------------
 tests/integration/helpers/keeper_config1.xml |  8 ++++---
 tests/integration/helpers/keeper_config2.xml |  9 ++++----
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index c751702dc6f..b0c207347a3 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -14,6 +14,7 @@
 #include <Common/assertProcessUserMatchesDataOwner.h>
 #include <Common/makeSocketAddress.h>
 #include <Server/waitServersToFinish.h>
+#include <Server/CloudPlacementInfo.h>
 #include <base/getMemoryAmount.h>
 #include <base/scope_guard.h>
 #include <base/safeExit.h>
@@ -352,6 +353,11 @@ try
 
     std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
 
+    if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
+    {
+        PlacementInfo::PlacementInfo::instance().initialize(config());
+    }
+
     GlobalThreadPool::initialize(
         config().getUInt("max_thread_pool_size", 100),
         config().getUInt("max_thread_pool_free_size", 1000),
diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index baad8d98e6a..374571bae7e 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -7,6 +7,7 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Coordination/KeeperConstants.h>
 #include <Common/logger_useful.h>
+#include <Server/CloudPlacementInfo.h>
 #include <Coordination/KeeperFeatureFlags.h>
 #include <boost/algorithm/string.hpp>
 
@@ -37,26 +38,11 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
 {
     dispatcher = dispatcher_;
 
-    if (config.hasProperty("keeper_server.availability_zone"))
+    const auto keeper_az = PlacementInfo::PlacementInfo::instance().getAvailabilityZone();
+    if (!keeper_az.empty())
     {
-        auto keeper_az = config.getString("keeper_server.availability_zone.value", "");
-        const auto auto_detect_for_cloud = config.getBool("keeper_server.availability_zone.enable_auto_detection_on_cloud", false);
-        if (keeper_az.empty() && auto_detect_for_cloud)
-        {
-            try
-            {
-                keeper_az = DB::S3::getRunningAvailabilityZone();
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
-            }
-        }
-        if (!keeper_az.empty())
-        {
-            system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
-            LOG_INFO(getLogger("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'", keeper_az);
-        }
+        system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
+        LOG_INFO(getLogger("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'", keeper_az);
     }
 
     updateKeeperMemorySoftLimit(config);
diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml
index 12c6c0b78b6..a4a1059ffe9 100644
--- a/tests/integration/helpers/keeper_config1.xml
+++ b/tests/integration/helpers/keeper_config1.xml
@@ -9,11 +9,13 @@
         <errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
     </logger>
 
+    <placement>
+        <use_imds>0</use_imds>
+        <availability_zone>az-zoo1</availability_zone>
+    </placement>
+
     <keeper_server>
         <tcp_port>2181</tcp_port>
-        <availability_zone>
-            <value>az-zoo1</value>
-        </availability_zone>
         <server_id>1</server_id>
 
         <coordination_settings>
diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml
index 2afff2f5e59..88a0d1f0b4b 100644
--- a/tests/integration/helpers/keeper_config2.xml
+++ b/tests/integration/helpers/keeper_config2.xml
@@ -9,13 +9,14 @@
         <errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
     </logger>
 
+    <placement>
+        <use_imds>0</use_imds>
+        <availability_zone>az-zoo2</availability_zone>
+    </placement>
+
     <keeper_server>
         <tcp_port>2181</tcp_port>
         <server_id>2</server_id>
-        <availability_zone>
-            <value>az-zoo2</value>
-            <enable_auto_detection_on_cloud>1</enable_auto_detection_on_cloud>
-        </availability_zone>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>

From 5f2358b014f887f2365f05e2102b917b15b591e8 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 14 Feb 2024 11:22:29 +0100
Subject: [PATCH 659/884] Add missing header

---
 src/Server/CloudPlacementInfo.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp
index 0b45a58d05a..0eb57254125 100644
--- a/src/Server/CloudPlacementInfo.cpp
+++ b/src/Server/CloudPlacementInfo.cpp
@@ -1,6 +1,7 @@
 #include <Server/CloudPlacementInfo.h>
 #include <Common/logger_useful.h>
 #include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
 #include <IO/S3/Credentials.h>
 #include <fmt/core.h>
 #include <filesystem>

From 90d07ba82ce785f254513686fdd6409a9a249ea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com>
Date: Wed, 14 Feb 2024 18:29:45 +0800
Subject: [PATCH 660/884] Trivial optimize of function coalesce.  (#59627)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* reuse result of functionfactory::get

* add perf test

* Update src/Functions/coalesce.cpp

Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>

* change as requested

---------

Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>
---
 src/Functions/coalesce.cpp     | 24 ++++++++++++++----------
 tests/performance/coalesce.xml |  3 +++
 2 files changed, 17 insertions(+), 10 deletions(-)
 create mode 100644 tests/performance/coalesce.xml

diff --git a/src/Functions/coalesce.cpp b/src/Functions/coalesce.cpp
index befebd1ff52..4ae90a9db13 100644
--- a/src/Functions/coalesce.cpp
+++ b/src/Functions/coalesce.cpp
@@ -29,7 +29,14 @@ public:
         return std::make_shared<FunctionCoalesce>(context);
     }
 
-    explicit FunctionCoalesce(ContextPtr context_) : context(context_) {}
+    explicit FunctionCoalesce(ContextPtr context_)
+        : context(context_)
+        , is_not_null(FunctionFactory::instance().get("isNotNull", context))
+        , assume_not_null(FunctionFactory::instance().get("assumeNotNull", context))
+        , if_function(FunctionFactory::instance().get("if", context))
+        , multi_if_function(FunctionFactory::instance().get("multiIf", context))
+    {
+    }
 
     std::string getName() const override
     {
@@ -110,8 +117,6 @@ public:
                 break;
         }
 
-        auto is_not_null = FunctionFactory::instance().get("isNotNull", context);
-        auto assume_not_null = FunctionFactory::instance().get("assumeNotNull", context);
 
         ColumnsWithTypeAndName multi_if_args;
         ColumnsWithTypeAndName tmp_args(1);
@@ -146,13 +151,8 @@ public:
         /// If there was only two arguments (3 arguments passed to multiIf)
         /// use function "if" instead, because it's implemented more efficient.
         /// TODO: make "multiIf" the same efficient.
-        FunctionOverloadResolverPtr if_function;
-        if (multi_if_args.size() == 3)
-            if_function = FunctionFactory::instance().get("if", context);
-        else
-            if_function = FunctionFactory::instance().get("multiIf", context);
-
-        ColumnPtr res = if_function->build(multi_if_args)->execute(multi_if_args, result_type, input_rows_count);
+        FunctionOverloadResolverPtr if_or_multi_if = multi_if_args.size() == 3 ? if_function : multi_if_function;
+        ColumnPtr res = if_or_multi_if->build(multi_if_args)->execute(multi_if_args, result_type, input_rows_count);
 
         /// if last argument is not nullable, result should be also not nullable
         if (!multi_if_args.back().column->isNullable() && res->isNullable())
@@ -170,6 +170,10 @@ public:
 
 private:
     ContextPtr context;
+    FunctionOverloadResolverPtr is_not_null;
+    FunctionOverloadResolverPtr assume_not_null;
+    FunctionOverloadResolverPtr if_function;
+    FunctionOverloadResolverPtr multi_if_function;
 };
 
 }
diff --git a/tests/performance/coalesce.xml b/tests/performance/coalesce.xml
new file mode 100644
index 00000000000..08b9a6aab1e
--- /dev/null
+++ b/tests/performance/coalesce.xml
@@ -0,0 +1,3 @@
+<test>
+    <query>select coalesce(materialize(null), -1) from numbers(1000000000) format Null settings max_block_size = 8192</query>
+</test>

From 75141583535fb06e1a53252582223711f8d1e1df Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 14 Feb 2024 10:30:32 +0000
Subject: [PATCH 661/884] Update version_date.tsv and changelogs after
 v24.1.4.20-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.4.20-stable.md | 28 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.1.4.20-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index e8293869a56..ab1bc58da1b 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.20"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 3cf89f1b841..39187781a86 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index d477d8aaca1..67ea2656310 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.3.31"
+ARG VERSION="24.1.4.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.4.20-stable.md b/docs/changelogs/v24.1.4.20-stable.md
new file mode 100644
index 00000000000..8612a485f12
--- /dev/null
+++ b/docs/changelogs/v24.1.4.20-stable.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.4.20-stable (f59d842b3fa) FIXME as compared to v24.1.3.31-stable (135b08cbd28)
+
+#### Improvement
+* Backported in [#59826](https://github.com/ClickHouse/ClickHouse/issues/59826): In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#59885](https://github.com/ClickHouse/ClickHouse/issues/59885): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 1e931b95b41..312a33ac2d6 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v24.1.4.20-stable	2024-02-14
 v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02
 v24.1.1.2048-stable	2024-01-30

From dda7c5d2b768a248064fd363c17a746a7f0b881f Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 14 Feb 2024 11:41:07 +0100
Subject: [PATCH 662/884] Add comment with motivation

---
 src/Storages/MergeTree/MergeTask.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index ad5fe3eb8ce..7fb4797e482 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -246,6 +246,7 @@ private:
         bool prepare();
         bool executeImpl();
 
+        /// NOTE: Using pointer-to-member instead of std::function and lambda makes stacktraces much more concise and readable
         using ExecuteAndFinalizeHorizontalPartSubtasks = std::array<bool(ExecuteAndFinalizeHorizontalPart::*)(), 2>;
 
         const ExecuteAndFinalizeHorizontalPartSubtasks subtasks
@@ -323,6 +324,7 @@ private:
         bool executeVerticalMergeForAllColumns() const;
         bool finalizeVerticalMergeForAllColumns() const;
 
+        /// NOTE: Using pointer-to-member instead of std::function and lambda makes stacktraces much more concise and readable
         using VerticalMergeStageSubtasks = std::array<bool(VerticalMergeStage::*)()const, 3>;
 
         const VerticalMergeStageSubtasks subtasks
@@ -373,6 +375,7 @@ private:
         bool executeProjections() const;
         bool finalizeProjectionsAndWholeMerge() const;
 
+        /// NOTE: Using pointer-to-member instead of std::function and lambda makes stacktraces much more concise and readable
         using MergeProjectionsStageSubtasks = std::array<bool(MergeProjectionsStage::*)()const, 3>;
 
         const MergeProjectionsStageSubtasks subtasks

From e6450f8af2c284b55b0220c8677c5964cd8d6acf Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 14 Feb 2024 11:48:03 +0100
Subject: [PATCH 663/884] More missing headers

---
 src/Server/CloudPlacementInfo.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp
index 0eb57254125..5a884dc08d2 100644
--- a/src/Server/CloudPlacementInfo.cpp
+++ b/src/Server/CloudPlacementInfo.cpp
@@ -3,6 +3,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <IO/S3/Credentials.h>
+#include <Poco/String.h>
 #include <fmt/core.h>
 #include <filesystem>
 

From 59d8912f6361abc0df81e385cb1bf0f4e7cf42d1 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:20:11 +0100
Subject: [PATCH 664/884] Update 02327_try_infer_integers_schema_inference test

---
 .../0_stateless/02327_try_infer_integers_schema_inference.sql    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql
index 0ceed178865..a4a69f4fa40 100644
--- a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql
+++ b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql
@@ -1,6 +1,7 @@
 -- Tags: no-fasttest
 
 set input_format_try_infer_integers=1;
+set input_format_try_infer_exponent_floats=1;
 
 select 'JSONEachRow';
 desc format(JSONEachRow, '{"x" : 123}');

From 4f8ee992b649f9091d7ed7bad53e1e65ec81480e Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:22:21 +0100
Subject: [PATCH 665/884] Apply suggestions from code review

Co-authored-by: Sergei Trifonov <svtrifonov@gmail.com>
---
 docs/en/interfaces/schema-inference.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 6a7b744dd43..39ae69eaef4 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -1994,8 +1994,7 @@ If data format is not specified and cannot be determined by the file extension,
 
 **Examples:**
 
-Let's say we have `data` with the next content:
-`data1`:
+Let's say we have `data` with the following content:
 ```
 "a","b"
 1,"Data1"
@@ -2027,4 +2026,6 @@ We can inspect and query this file without specifying format or structure:
 └───┴───────┘
 ```
 
-**Note:** ClickHouse can detect only some subset of formats and this detection takes some time, it's always better to specify the format explicitly.
\ No newline at end of file
+:::note
+ClickHouse can detect only some subset of formats and this detection takes some time, it's always better to specify the format explicitly.
+:::
\ No newline at end of file

From db2c15c0a6824b9fc7aa4a8216db44d7bcacf5bf Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:24:10 +0100
Subject: [PATCH 666/884] Apply suggestions from code review

Co-authored-by: Sergei Trifonov <svtrifonov@gmail.com>
---
 src/Formats/ReadSchemaUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index f97df25aba7..5badf4301bf 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -192,13 +192,13 @@ try
                 if (format_name)
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "The table structure cannot be extracted from a {} format file:\n{}\n.You can specify the structure manually",
+                        "The table structure cannot be extracted from a {} format file:\n{}.\nYou can specify the structure manually",
                         *format_name,
                         exception_message);
 
                 throw Exception(
                     ErrorCodes::CANNOT_DETECT_FORMAT,
-                    "The data format cannot be detected by the contents of the files:\n{}\n.You can specify the format manually",
+                    "The data format cannot be detected by the contents of the files:\n{}.\nYou can specify the format manually",
                     exception_message);
             }
 

From 48fcc3eeab4254d6d3dca6e3eec5c14aa68dec49 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 14 Feb 2024 12:41:41 +0100
Subject: [PATCH 667/884] Fix standalone build

---
 programs/keeper/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 143ded0ee85..0c3d76e614f 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -69,6 +69,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp

From 83f28b8537155439b77c4c141dcc718623dd3e24 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 14 Feb 2024 12:51:14 +0100
Subject: [PATCH 668/884] Move definition to cpp

---
 src/Server/CloudPlacementInfo.cpp | 6 ++++++
 src/Server/CloudPlacementInfo.h   | 6 +-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp
index 5a884dc08d2..0790f825a45 100644
--- a/src/Server/CloudPlacementInfo.cpp
+++ b/src/Server/CloudPlacementInfo.cpp
@@ -39,6 +39,12 @@ namespace
 }
 
 
+PlacementInfo & PlacementInfo::instance()
+{
+    static PlacementInfo instance;
+    return instance;
+}
+
 void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config)
 {
     use_imds = config.getBool(getConfigPath("use_imds"), false);
diff --git a/src/Server/CloudPlacementInfo.h b/src/Server/CloudPlacementInfo.h
index b26f4b10e88..407f668142f 100644
--- a/src/Server/CloudPlacementInfo.h
+++ b/src/Server/CloudPlacementInfo.h
@@ -18,11 +18,7 @@ static constexpr auto DEFAULT_AZ_FILE_PATH = "/run/instance-metadata/node-zone";
 class PlacementInfo : private boost::noncopyable
 {
 public:
-    static PlacementInfo & instance()
-    {
-        static PlacementInfo instance;
-        return instance;
-    }
+    static PlacementInfo & instance();
 
     void initialize(const Poco::Util::AbstractConfiguration & config);
 

From 2c34a98426b270c6f233a9833aa7ceab6ea326cf Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 14 Feb 2024 12:20:09 +0000
Subject: [PATCH 669/884] Apply suggestions from the review

---
 src/Backups/.BackupCoordinationLocal.cpp.pHKoqj | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/Backups/.BackupCoordinationLocal.cpp.pHKoqj

diff --git a/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj b/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj
new file mode 100644
index 00000000000..e69de29bb2d

From 12963ce53e289d33fa3fe366378e123fd7e5b5f5 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 14 Feb 2024 12:20:37 +0000
Subject: [PATCH 670/884] Analyzer: Fix test_storage_rabbitmq

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 108 ++++++++++++------
 tests/analyzer_integration_broken_tests.txt   |   1 -
 .../02989_group_by_tuple.reference            |   3 +
 .../0_stateless/02989_group_by_tuple.sql      |   1 +
 4 files changed, 78 insertions(+), 35 deletions(-)
 create mode 100644 tests/queries/0_stateless/02989_group_by_tuple.reference
 create mode 100644 tests/queries/0_stateless/02989_group_by_tuple.sql

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 2510d35f720..a8054a2de34 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1380,6 +1380,8 @@ private:
 
     ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope);
 
+    void resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope);
+
     void resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope);
 
     void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope);
@@ -6263,6 +6265,77 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
     return result_projection_names;
 }
 
+namespace
+{
+
+void expandTuplesInList(QueryTreeNodes & key_list)
+{
+    QueryTreeNodes expanded_keys;
+    expanded_keys.reserve(key_list.size());
+    for (auto const & key : key_list)
+    {
+        if (auto * function = key->as<FunctionNode>(); function != nullptr && function->getFunctionName() == "tuple")
+        {
+            std::copy(function->getArguments().begin(), function->getArguments().end(), std::back_inserter(expanded_keys));
+        }
+        else
+            expanded_keys.push_back(key);
+    }
+    key_list = std::move(expanded_keys);
+}
+
+}
+
+/** Resolve GROUP BY clause.
+  */
+void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope)
+{
+    const auto & settings = scope.context->getSettingsRef();
+
+    if (query_node_typed.isGroupByWithGroupingSets())
+    {
+        for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes())
+        {
+            if (settings.enable_positional_arguments)
+                replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
+
+            resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+            // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
+            // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
+            auto & group_by_list = grouping_sets_keys_list_node->as<ListNode &>().getNodes();
+            expandTuplesInList(group_by_list);
+        }
+
+        if (scope.group_by_use_nulls)
+        {
+            for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+            {
+                for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
+                    scope.nullable_group_by_keys.insert(group_by_elem);
+            }
+        }
+    }
+    else
+    {
+        if (settings.enable_positional_arguments)
+            replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
+
+        resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+        // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
+        // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
+        auto & group_by_list = query_node_typed.getGroupBy().getNodes();
+        expandTuplesInList(group_by_list);
+
+        if (scope.group_by_use_nulls)
+        {
+            for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
+                scope.nullable_group_by_keys.insert(group_by_elem);
+        }
+    }
+}
+
 /** Resolve interpolate columns nodes list.
   */
 void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope)
@@ -7453,40 +7526,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         resolveExpressionNode(query_node_typed.getWhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
 
     if (query_node_typed.hasGroupBy())
-    {
-        if (query_node_typed.isGroupByWithGroupingSets())
-        {
-            for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes())
-            {
-                if (settings.enable_positional_arguments)
-                    replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
-
-                resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
-            }
-
-            if (scope.group_by_use_nulls)
-            {
-                for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
-                {
-                    for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
-                        scope.nullable_group_by_keys.insert(group_by_elem);
-                }
-            }
-        }
-        else
-        {
-            if (settings.enable_positional_arguments)
-                replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
-
-            resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
-
-            if (scope.group_by_use_nulls)
-            {
-                for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
-                    scope.nullable_group_by_keys.insert(group_by_elem);
-            }
-        }
-    }
+        resolveGroupByNode(query_node_typed, scope);
 
     if (query_node_typed.hasHaving())
         resolveExpressionNode(query_node_typed.getHaving(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index b2576bfdb2e..16c1a6802ea 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -12,5 +12,4 @@ test_select_access_rights/test_main.py::test_alias_columns
 test_settings_profile/test.py::test_show_profiles
 test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
-test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
 test_wrong_db_or_table_name/test.py::test_wrong_table_name
diff --git a/tests/queries/0_stateless/02989_group_by_tuple.reference b/tests/queries/0_stateless/02989_group_by_tuple.reference
new file mode 100644
index 00000000000..4539bbf2d22
--- /dev/null
+++ b/tests/queries/0_stateless/02989_group_by_tuple.reference
@@ -0,0 +1,3 @@
+0
+1
+2
diff --git a/tests/queries/0_stateless/02989_group_by_tuple.sql b/tests/queries/0_stateless/02989_group_by_tuple.sql
new file mode 100644
index 00000000000..d0a205f5edc
--- /dev/null
+++ b/tests/queries/0_stateless/02989_group_by_tuple.sql
@@ -0,0 +1 @@
+SELECT number FROM numbers(3) GROUP BY (number, number % 2) ORDER BY number;

From 3d86e87f7f85b89cf7db1d119585c3c139f80eff Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 14 Feb 2024 12:28:54 +0000
Subject: [PATCH 671/884] Fixing tests.

---
 src/Interpreters/InterpreterInsertQuery.cpp   | 15 ++--
 .../Transforms/CountingTransform.cpp          |  4 +-
 src/Processors/Transforms/CountingTransform.h | 86 +++----------------
 src/QueryPipeline/QueryPipeline.cpp           |  5 --
 4 files changed, 24 insertions(+), 86 deletions(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index beb9f4d1857..724cfca6a80 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -580,7 +580,7 @@ BlockIO InterpreterInsertQuery::execute()
         pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
         {
             auto context_ptr = getContext();
-            auto counting = std::make_shared<SimpleCountingTransform>(in_header, context_ptr->getQuota());
+            auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
             counting->setProcessListElement(context_ptr->getProcessListElement());
             counting->setProgressCallback(context_ptr->getProgressCallback());
 
@@ -645,13 +645,6 @@ BlockIO InterpreterInsertQuery::execute()
         auto & chain = presink_chains.at(0);
         chain.appendChain(std::move(sink_chains.at(0)));
 
-        auto context_ptr = getContext();
-        auto counting = std::make_shared<SimpleCountingTransform>(chain.getInputHeader(), context_ptr->getQuota());
-        counting->setProcessListElement(context_ptr->getProcessListElement());
-        counting->setProgressCallback(context_ptr->getProgressCallback());
-
-        chain.addSource(std::move(counting));
-
         if (shouldAddSquashingFroStorage(table))
         {
             bool table_prefers_large_blocks = table->prefersLargeBlocks();
@@ -664,6 +657,12 @@ BlockIO InterpreterInsertQuery::execute()
             chain.addSource(std::move(squashing));
         }
 
+        auto context_ptr = getContext();
+        auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
+        counting->setProcessListElement(context_ptr->getProcessListElement());
+        counting->setProgressCallback(context_ptr->getProgressCallback());
+        chain.addSource(std::move(counting));
+
         res.pipeline = QueryPipeline(std::move(presink_chains[0]));
         res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
         res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp
index 7143ae2b8fc..3dfb9fe178f 100644
--- a/src/Processors/Transforms/CountingTransform.cpp
+++ b/src/Processors/Transforms/CountingTransform.cpp
@@ -15,7 +15,7 @@ namespace ProfileEvents
 namespace DB
 {
 
-void CountingBase::count(const Chunk & chunk)
+void CountingTransform::onConsume(Chunk chunk)
 {
     if (quota)
         quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes());
@@ -40,6 +40,8 @@ void CountingBase::count(const Chunk & chunk)
 
     if (progress_callback)
         progress_callback(local_progress);
+
+    cur_chunk = std::move(chunk);
 }
 
 }
diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h
index 7fd9bd12d4b..05d8e2aeac8 100644
--- a/src/Processors/Transforms/CountingTransform.h
+++ b/src/Processors/Transforms/CountingTransform.h
@@ -2,7 +2,6 @@
 
 #include <IO/Progress.h>
 #include <Processors/Transforms/ExceptionKeepingTransform.h>
-#include <Processors/ISimpleTransform.h>
 #include <Access/EnabledQuota.h>
 
 
@@ -14,13 +13,17 @@ using QueryStatusPtr = std::shared_ptr<QueryStatus>;
 class ThreadStatus;
 
 /// Proxy class which counts number of written block, rows, bytes
-class CountingBase
+class CountingTransform final : public ExceptionKeepingTransform
 {
 public:
-    explicit CountingBase(
+    explicit CountingTransform(
+        const Block & header,
         ThreadStatus * thread_status_ = nullptr,
         std::shared_ptr<const EnabledQuota> quota_ = nullptr)
-        : thread_status(thread_status_), quota(std::move(quota_)) {}
+        : ExceptionKeepingTransform(header, header)
+        , thread_status(thread_status_), quota(std::move(quota_)) {}
+
+    String getName() const override { return "CountingTransform"; }
 
     void setProgressCallback(const ProgressCallback & callback)
     {
@@ -37,7 +40,13 @@ public:
         return progress;
     }
 
-    void count(const Chunk & chunk);
+    void onConsume(Chunk chunk) override;
+    GenerateResult onGenerate() override
+    {
+        GenerateResult res;
+        res.chunk = std::move(cur_chunk);
+        return res;
+    }
 
 protected:
     Progress progress;
@@ -47,74 +56,7 @@ protected:
 
     /// Quota is used to limit amount of written bytes.
     std::shared_ptr<const EnabledQuota> quota;
-};
-
-class CountingTransform final : public ExceptionKeepingTransform
-{
-public:
-    explicit CountingTransform(
-        const Block & header,
-        ThreadStatus * thread_status_ = nullptr,
-        std::shared_ptr<const EnabledQuota> quota_ = nullptr)
-        : ExceptionKeepingTransform(header, header), counting(thread_status_, std::move(quota_)) {}
-
-    String getName() const override { return "CountingTransform"; }
-    void onConsume(Chunk chunk) override
-    {
-        counting.count(chunk);
-        cur_chunk = std::move(chunk);
-    }
-
-    GenerateResult onGenerate() override
-    {
-        GenerateResult res;
-        res.chunk = std::move(cur_chunk);
-        return res;
-    }
-
-    void setProgressCallback(const ProgressCallback & callback)
-    {
-        counting.setProgressCallback(callback);
-    }
-
-    void setProcessListElement(QueryStatusPtr elem)
-    {
-        counting.setProcessListElement(std::move(elem));
-    }
-
-protected:
-    CountingBase counting;
     Chunk cur_chunk;
 };
 
-class SimpleCountingTransform final : public ISimpleTransform
-{
-public:
-    explicit SimpleCountingTransform(
-        const Block & header,
-        std::shared_ptr<const EnabledQuota> quota_ = nullptr)
-        : ISimpleTransform(header, header, false)
-        , counting(nullptr, std::move(quota_))
-    {}
-
-    String getName() const override { return "SimpleCountingTransform"; }
-    void transform(Chunk & chunk) override
-    {
-        counting.count(chunk);
-    }
-
-    void setProgressCallback(const ProgressCallback & callback)
-    {
-        counting.setProgressCallback(callback);
-    }
-
-    void setProcessListElement(QueryStatusPtr elem)
-    {
-        counting.setProcessListElement(std::move(elem));
-    }
-
-protected:
-    CountingBase counting;
-};
-
 }
diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp
index 569d4655460..935c006c217 100644
--- a/src/QueryPipeline/QueryPipeline.cpp
+++ b/src/QueryPipeline/QueryPipeline.cpp
@@ -553,11 +553,6 @@ void QueryPipeline::setProcessListElement(QueryStatusPtr elem)
         {
             counting->setProcessListElement(elem);
         }
-
-        if (auto * counting = dynamic_cast<SimpleCountingTransform *>(&input->getProcessor()))
-        {
-            counting->setProcessListElement(elem);
-        }
     }
 }
 

From 32a23244c9ca491200f35ee7a6e523f9f7f96620 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 30 Aug 2023 19:53:32 +0000
Subject: [PATCH 672/884] Remove context from comparison functions.

(cherry picked from commit 57608cc6f90ae7c29d7fc82ed228c1eaf6e25c73)
---
 src/Functions/FunctionsComparison.h |  9 ++++-----
 src/Functions/equals.cpp            | 12 ++++++++++--
 src/Functions/greater.cpp           | 21 +++++++++++++++++----
 src/Functions/greaterOrEquals.cpp   | 29 ++++++++++++++++++++++++-----
 src/Functions/less.cpp              | 20 ++++++++++++++++----
 src/Functions/lessOrEquals.cpp      | 28 +++++++++++++++++++++++-----
 src/Functions/notEquals.cpp         | 12 ++++++++++--
 7 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index 446d6c008f0..d04f76d051a 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -643,13 +643,12 @@ class FunctionComparison : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
-    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionComparison>(context); }
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionComparison>(decimalCheckComparisonOverflow(context)); }
 
-    explicit FunctionComparison(ContextPtr context_)
-        : context(context_), check_decimal_overflow(decimalCheckComparisonOverflow(context)) {}
+    explicit FunctionComparison(bool check_decimal_overflow_)
+        : check_decimal_overflow(check_decimal_overflow_) {}
 
 private:
-    ContextPtr context;
     bool check_decimal_overflow = true;
 
     template <typename T0, typename T1>
@@ -1190,7 +1189,7 @@ public:
 
         if (left_tuple && right_tuple)
         {
-            auto func = FunctionToOverloadResolverAdaptor(FunctionComparison<Op, Name>::create(context));
+            auto func = FunctionToOverloadResolverAdaptor(std::make_shared<FunctionComparison<Op, Name>>(check_decimal_overflow));
 
             bool has_nullable = false;
             bool has_null = false;
diff --git a/src/Functions/equals.cpp b/src/Functions/equals.cpp
index de1cf623ea6..5c59daf0537 100644
--- a/src/Functions/equals.cpp
+++ b/src/Functions/equals.cpp
@@ -1,5 +1,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
 
 
 namespace DB
@@ -16,9 +17,16 @@ template <>
 ColumnPtr FunctionComparison<EqualsOp, NameEquals>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
+    FunctionOverloadResolverPtr func_builder_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionEquals>(check_decimal_overflow));
+
+
+    FunctionOverloadResolverPtr func_builder_and
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+
     return executeTupleEqualityImpl(
-        FunctionFactory::instance().get("equals", context),
-        FunctionFactory::instance().get("and", context),
+        func_builder_equals,
+        func_builder_and,
         x, y, tuple_size, input_rows_count);
 }
 
diff --git a/src/Functions/greater.cpp b/src/Functions/greater.cpp
index c36f8d7acca..60fbd18a4e7 100644
--- a/src/Functions/greater.cpp
+++ b/src/Functions/greater.cpp
@@ -1,11 +1,14 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
+
 
 
 namespace DB
 {
 
 using FunctionGreater = FunctionComparison<GreaterOp, NameGreater>;
+using FunctionEquals = FunctionComparison<EqualsOp, NameEquals>;
 
 REGISTER_FUNCTION(Greater)
 {
@@ -16,14 +19,24 @@ template <>
 ColumnPtr FunctionComparison<GreaterOp, NameGreater>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
-    auto greater = FunctionFactory::instance().get("greater", context);
+    FunctionOverloadResolverPtr greater
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGreater>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr func_builder_or
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+
+    FunctionOverloadResolverPtr func_builder_and
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+
+    FunctionOverloadResolverPtr func_builder_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionEquals>(check_decimal_overflow));
 
     return executeTupleLessGreaterImpl(
         greater,
         greater,
-        FunctionFactory::instance().get("and", context),
-        FunctionFactory::instance().get("or", context),
-        FunctionFactory::instance().get("equals", context),
+        func_builder_and,
+        func_builder_or,
+        func_builder_equals,
         x, y, tuple_size, input_rows_count);
 }
 
diff --git a/src/Functions/greaterOrEquals.cpp b/src/Functions/greaterOrEquals.cpp
index 089ac4d5466..c77ca585c76 100644
--- a/src/Functions/greaterOrEquals.cpp
+++ b/src/Functions/greaterOrEquals.cpp
@@ -1,11 +1,14 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
 
 
 namespace DB
 {
 
 using FunctionGreaterOrEquals = FunctionComparison<GreaterOrEqualsOp, NameGreaterOrEquals>;
+using FunctionGreater = FunctionComparison<GreaterOp, NameGreater>;
+using FunctionEquals = FunctionComparison<EqualsOp, NameEquals>;
 
 REGISTER_FUNCTION(GreaterOrEquals)
 {
@@ -16,12 +19,28 @@ template <>
 ColumnPtr FunctionComparison<GreaterOrEqualsOp, NameGreaterOrEquals>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
+
+    FunctionOverloadResolverPtr greater
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGreater>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr greater_or_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGreaterOrEquals>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr func_builder_or
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+
+    FunctionOverloadResolverPtr func_builder_and
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+
+    FunctionOverloadResolverPtr func_builder_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionEquals>(check_decimal_overflow));
+
     return executeTupleLessGreaterImpl(
-        FunctionFactory::instance().get("greater", context),
-        FunctionFactory::instance().get("greaterOrEquals", context),
-        FunctionFactory::instance().get("and", context),
-        FunctionFactory::instance().get("or", context),
-        FunctionFactory::instance().get("equals", context),
+        greater,
+        greater_or_equals,
+        func_builder_and,
+        func_builder_or,
+        func_builder_equals,
         x, y, tuple_size, input_rows_count);
 }
 
diff --git a/src/Functions/less.cpp b/src/Functions/less.cpp
index 63bfcfc9f40..0998dc60292 100644
--- a/src/Functions/less.cpp
+++ b/src/Functions/less.cpp
@@ -1,11 +1,13 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
 
 
 namespace DB
 {
 
 using FunctionLess = FunctionComparison<LessOp, NameLess>;
+using FunctionEquals = FunctionComparison<EqualsOp, NameEquals>;
 
 REGISTER_FUNCTION(Less)
 {
@@ -16,14 +18,24 @@ template <>
 ColumnPtr FunctionComparison<LessOp, NameLess>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
-    auto less = FunctionFactory::instance().get("less", context);
+    FunctionOverloadResolverPtr less
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionLess>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr func_builder_or
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+
+    FunctionOverloadResolverPtr func_builder_and
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+
+    FunctionOverloadResolverPtr func_builder_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionEquals>(check_decimal_overflow));
 
     return executeTupleLessGreaterImpl(
         less,
         less,
-        FunctionFactory::instance().get("and", context),
-        FunctionFactory::instance().get("or", context),
-        FunctionFactory::instance().get("equals", context),
+        func_builder_and,
+        func_builder_or,
+        func_builder_equals,
         x, y, tuple_size, input_rows_count);
 }
 
diff --git a/src/Functions/lessOrEquals.cpp b/src/Functions/lessOrEquals.cpp
index a91afabe226..e88ae34da75 100644
--- a/src/Functions/lessOrEquals.cpp
+++ b/src/Functions/lessOrEquals.cpp
@@ -1,11 +1,14 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
 
 
 namespace DB
 {
 
 using FunctionLessOrEquals = FunctionComparison<LessOrEqualsOp, NameLessOrEquals>;
+using FunctionLess = FunctionComparison<LessOp, NameLess>;
+using FunctionEquals = FunctionComparison<EqualsOp, NameEquals>;
 
 REGISTER_FUNCTION(LessOrEquals)
 {
@@ -16,12 +19,27 @@ template <>
 ColumnPtr FunctionComparison<LessOrEqualsOp, NameLessOrEquals>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
+    FunctionOverloadResolverPtr less_or_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionLessOrEquals>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr less
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionLess>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr func_builder_or
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+
+    FunctionOverloadResolverPtr func_builder_and
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+
+    FunctionOverloadResolverPtr func_builder_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionEquals>(check_decimal_overflow));
+
     return executeTupleLessGreaterImpl(
-        FunctionFactory::instance().get("less", context),
-        FunctionFactory::instance().get("lessOrEquals", context),
-        FunctionFactory::instance().get("and", context),
-        FunctionFactory::instance().get("or", context),
-        FunctionFactory::instance().get("equals", context),
+        less,
+        less_or_equals,
+        func_builder_and,
+        func_builder_or,
+        func_builder_equals,
         x, y, tuple_size, input_rows_count);
 }
 
diff --git a/src/Functions/notEquals.cpp b/src/Functions/notEquals.cpp
index 08bedff399e..38d0735898c 100644
--- a/src/Functions/notEquals.cpp
+++ b/src/Functions/notEquals.cpp
@@ -1,5 +1,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsComparison.h>
+#include <Functions/FunctionsLogical.h>
+
 
 
 namespace DB
@@ -16,9 +18,15 @@ template <>
 ColumnPtr FunctionComparison<NotEqualsOp, NameNotEquals>::executeTupleImpl(
     const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const
 {
+    FunctionOverloadResolverPtr func_builder_not_equals
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionNotEquals>(check_decimal_overflow));
+
+    FunctionOverloadResolverPtr func_builder_or
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+
     return executeTupleEqualityImpl(
-        FunctionFactory::instance().get("notEquals", context),
-        FunctionFactory::instance().get("or", context),
+        func_builder_not_equals,
+        func_builder_or,
         x, y, tuple_size, input_rows_count);
 }
 

From eb3836ac5a8c38f5d68c59c692177a49abb17a2a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 6 Feb 2024 11:16:08 +0100
Subject: [PATCH 673/884] Fix error "Read beyond last offset" for
 AsynchronousBoundedReadBuffer.

---
 .../IO/AsynchronousBoundedReadBuffer.cpp      | 43 ++++++++++++++-----
 src/Disks/IO/AsynchronousBoundedReadBuffer.h  |  3 ++
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
index bd19540bf44..236ea486d36 100644
--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
@@ -69,12 +69,7 @@ bool AsynchronousBoundedReadBuffer::hasPendingDataToRead()
             return false;
 
         if (file_offset_of_buffer_end > *read_until_position)
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Read beyond last offset ({} > {}, info: {})",
-                file_offset_of_buffer_end, *read_until_position, impl->getInfoForLog());
-        }
+            throwReadBeyondLastOffset();
     }
 
     return true;
@@ -103,6 +98,18 @@ IAsynchronousReader::Result AsynchronousBoundedReadBuffer::readSync(char * data,
     return reader.execute(request);
 }
 
+size_t AsynchronousBoundedReadBuffer::getBufferSizeForReading() const
+{
+    size_t buffer_size = chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize());
+    if (read_until_position)
+    {
+        if (file_offset_of_buffer_end > *read_until_position)
+            throwReadBeyondLastOffset();
+        buffer_size = std::min(buffer_size, *read_until_position - file_offset_of_buffer_end);
+    }
+    return buffer_size;
+}
+
 void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
 {
     if (prefetch_future.valid())
@@ -114,7 +121,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
     last_prefetch_info.submit_time = std::chrono::system_clock::now();
     last_prefetch_info.priority = priority;
 
-    prefetch_buffer.resize(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize()));
+    prefetch_buffer.resize(getBufferSizeForReading());
     prefetch_future = readAsync(prefetch_buffer.data(), prefetch_buffer.size(), priority);
     ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches);
 }
@@ -126,14 +133,15 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position)
         if (position < file_offset_of_buffer_end)
         {
             /// file has been read beyond new read until position already
-            if (working_buffer.size() >= file_offset_of_buffer_end - position)
+            if (available() >= file_offset_of_buffer_end - position)
             {
-                /// new read until position is inside working buffer
+                /// new read until position is after the current position in the working buffer
                 file_offset_of_buffer_end = position;
+                working_buffer.resize(working_buffer.size() - (file_offset_of_buffer_end - position));
             }
             else
             {
-                /// new read until position is before working buffer begin
+                /// new read until position is before the current position in the working buffer
                 throw Exception(
                     ErrorCodes::LOGICAL_ERROR,
                     "Attempt to set read until position before already read data ({} > {}, info: {})",
@@ -155,6 +163,16 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position)
     }
 }
 
+void AsynchronousBoundedReadBuffer::throwReadBeyondLastOffset() const
+{
+    size_t file_size = impl->getFileSize();
+    size_t read_end_position = read_until_position ? *read_until_position : file_size;
+    throw Exception(
+        ErrorCodes::LOGICAL_ERROR,
+        "Read beyond last offset ({} > {}): file size = {}, info: {}",
+        file_offset_of_buffer_end, read_end_position, file_size, impl->getInfoForLog());
+}
+
 void AsynchronousBoundedReadBuffer::appendToPrefetchLog(
     FilesystemPrefetchState state,
     int64_t size,
@@ -210,7 +228,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
     }
     else
     {
-        memory.resize(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize()));
+        memory.resize(getBufferSizeForReading());
 
         {
             ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds);
@@ -238,6 +256,9 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
     /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()]
     chassert(file_offset_of_buffer_end <= impl->getFileSize());
 
+    if (read_until_position && (file_offset_of_buffer_end > *read_until_position))
+        throwReadBeyondLastOffset();
+
     return bytes_read;
 }
 
diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h
index e5030f37b1d..b945aed28f0 100644
--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h
@@ -94,8 +94,11 @@ private:
 
     IAsynchronousReader::Result readSync(char * data, size_t size);
 
+    size_t getBufferSizeForReading() const;
+
     void resetPrefetch(FilesystemPrefetchState state);
 
+    [[noreturn]] void throwReadBeyondLastOffset() const;
 };
 
 }

From 8919e3b011796d837064a30400775497b5c6aeac Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 6 Feb 2024 18:28:26 +0100
Subject: [PATCH 674/884] Add test.

---
 ...gtest_asynchronous_bounded_read_buffer.cpp | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp

diff --git a/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp b/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp
new file mode 100644
index 00000000000..5fee295f53a
--- /dev/null
+++ b/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp
@@ -0,0 +1,82 @@
+#include <gtest/gtest.h>
+
+#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
+#include <Disks/IO/createReadBufferFromFileBase.h>
+#include <IO/WriteBufferFromFile.h>
+#include <Poco/TemporaryFile.h>
+#include <filesystem>
+
+
+using namespace DB;
+namespace fs = std::filesystem;
+
+class AsynchronousBoundedReadBufferTest : public ::testing::TestWithParam<const char *>
+{
+public:
+    AsynchronousBoundedReadBufferTest() { fs::create_directories(temp_folder.path()); }
+
+    String makeTempFile(const String & contents)
+    {
+        String path = fmt::format("{}/{}", temp_folder.path(), counter);
+        ++counter;
+
+        WriteBufferFromFile out{path};
+        out.write(contents.data(), contents.size());
+        out.finalize();
+
+        return path;
+    }
+
+private:
+    Poco::TemporaryFile temp_folder;
+    size_t counter = 0;
+};
+
+String getAlphabetWithDigits()
+{
+    String contents = "";
+    for (char c = 'a'; c <= 'z'; ++c)
+        contents += c;
+    for (char c = '0'; c <= '9'; ++c)
+        contents += c;
+    return contents;
+}
+
+
+TEST_F(AsynchronousBoundedReadBufferTest, setReadUntilPosition)
+{
+    String file_path = makeTempFile(getAlphabetWithDigits());
+    ThreadPoolRemoteFSReader remote_fs_reader(4, 0);
+
+    for (bool with_prefetch : {false, true})
+    {
+        AsynchronousBoundedReadBuffer read_buffer(createReadBufferFromFileBase(file_path, {}), remote_fs_reader, {});
+        read_buffer.setReadUntilPosition(20);
+
+        auto try_read = [&](size_t count)
+        {
+            if (with_prefetch)
+                read_buffer.prefetch(Priority{0});
+
+            String str;
+            str.resize(count);
+            str.resize(read_buffer.read(str.data(), str.size()));
+            return str;
+        };
+
+        EXPECT_EQ(try_read(15), "abcdefghijklmno");
+        EXPECT_EQ(try_read(15), "pqrst");
+        EXPECT_EQ(try_read(15), "");
+
+        read_buffer.setReadUntilPosition(25);
+
+        EXPECT_EQ(try_read(15), "uvwxy");
+        EXPECT_EQ(try_read(15), "");
+
+        read_buffer.setReadUntilEnd();
+
+        EXPECT_EQ(try_read(15), "z0123456789");
+        EXPECT_EQ(try_read(15), "");
+    }
+}

From 64779835fa90f9cb178084657f24330bca7c9506 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 14 Feb 2024 12:48:05 +0000
Subject: [PATCH 675/884] Update tests

---
 tests/performance/array_index_low_cardinality_numbers.xml | 4 ++++
 tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 2 +-
 tests/queries/0_stateless/02735_parquet_encoder.sql       | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/performance/array_index_low_cardinality_numbers.xml b/tests/performance/array_index_low_cardinality_numbers.xml
index f8fa27df05f..a68a37ebfff 100644
--- a/tests/performance/array_index_low_cardinality_numbers.xml
+++ b/tests/performance/array_index_low_cardinality_numbers.xml
@@ -1,4 +1,8 @@
 <test>
+    <settings>
+        <allow_suspicious_low_cardinality_types>1</allow_suspicious_low_cardinality_types>
+    </settings>
+
     <create_query>DROP TABLE IF EXISTS perf_lc_num</create_query>
     <create_query>CREATE TABLE perf_lc_num(
         num UInt8,
diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh
index bc1a4cbfdd1..0b6b9f461b0 100755
--- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh
+++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh
@@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 
-${CLICKHOUSE_CLIENT} --multiquery --multiline --query="""
+${CLICKHOUSE_CLIENT} --allow_suspicious_low_cardinality_types=1 --multiquery --multiline --query="""
 
 DROP TABLE IF EXISTS t_01411;
 DROP TABLE IF EXISTS t_01411_num;
diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql
index 19125abf8da..fe45a2a317d 100644
--- a/tests/queries/0_stateless/02735_parquet_encoder.sql
+++ b/tests/queries/0_stateless/02735_parquet_encoder.sql
@@ -6,6 +6,7 @@ set output_format_parquet_data_page_size = 800;
 set output_format_parquet_batch_size = 100;
 set output_format_parquet_row_group_size_bytes = 1000000000;
 set engine_file_truncate_on_insert=1;
+set allow_suspicious_low_cardinality_types=1;
 
 -- Write random data to parquet file, then read from it and check that it matches what we wrote.
 -- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive),

From fd5b5722259344c60ff6ec77ae0a58a0949d66a9 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 14 Feb 2024 13:00:22 +0000
Subject: [PATCH 676/884] Fix

---
 src/Common/FailPoint.h            | 1 +
 src/Interpreters/executeQuery.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h
index a0081d67081..6aafbc0053a 100644
--- a/src/Common/FailPoint.h
+++ b/src/Common/FailPoint.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "config.h"
 #include <Common/Exception.h>
 #include <Core/Types.h>
 #include <Poco/Util/AbstractConfiguration.h>
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index e8a032974c6..11eaffb99ff 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1380,7 +1380,7 @@ void executeQuery(
                     result_details.content_type = output_format->getContentType();
                     result_details.format = format_name;
 
-                    fiu_do_on(FailPoint::execute_query_calling_empty_set_result_func_on_exception, {
+                    fiu_do_on(FailPoints::execute_query_calling_empty_set_result_func_on_exception, {
                         set_result_details = nullptr;
                         set_result_details(result_details);
                     });

From 41304c3e11934e105dbb82dd805bc4414ab952ac Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 14 Feb 2024 14:08:20 +0100
Subject: [PATCH 677/884] Update src/Functions/FunctionBinaryArithmetic.h

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Functions/FunctionBinaryArithmetic.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index fa2ff8fc275..967c2b73881 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -644,7 +644,8 @@ private:
             {
                 if ((*right_nullmap)[0])
                 {
-                    c[0] = ResultType();
+                    for (size_t i = 0; i < size; ++i)
+                        c[i] = ResultType();
                     return;
                 }
 

From 0bee591a0cf94160dff9846a16e47eec0e1d4182 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 14 Feb 2024 09:26:28 +0100
Subject: [PATCH 678/884] Fix build

---
 programs/keeper/CMakeLists.txt         | 1 +
 src/Server/PrometheusMetricsWriter.cpp | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 143ded0ee85..1688ec6a93a 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -39,6 +39,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp
diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp
index 70a2fa0a41b..d0fdcd61493 100644
--- a/src/Server/PrometheusMetricsWriter.cpp
+++ b/src/Server/PrometheusMetricsWriter.cpp
@@ -4,6 +4,8 @@
 #include <Common/re2.h>
 #include <algorithm>
 
+#include "config.h"
+
 namespace
 {
 
@@ -103,6 +105,7 @@ void writeAsyncMetrics(DB::WriteBuffer & wb, const DB::AsynchronousMetricValues
 
 }
 
+#if USE_NURAFT
 namespace ProfileEvents
 {
     extern const std::vector<Event> keeper_profile_events;
@@ -112,6 +115,7 @@ namespace CurrentMetrics
 {
     extern const std::vector<Metric> keeper_metrics;
 }
+#endif
 
 
 namespace DB
@@ -177,8 +181,9 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
 
 }
 
-void KeeperPrometheusMetricsWriter::write(WriteBuffer & wb) const
+void KeeperPrometheusMetricsWriter::write([[maybe_unused]] WriteBuffer & wb) const
 {
+#if USE_NURAFT
     if (send_events)
     {
         for (auto event : ProfileEvents::keeper_profile_events)
@@ -193,6 +198,7 @@ void KeeperPrometheusMetricsWriter::write(WriteBuffer & wb) const
 
     if (send_asynchronous_metrics)
         writeAsyncMetrics(wb, async_metrics.getValues());
+#endif
 }
 
 }

From eafa4219d9aeadd732c67372eef7aaf428afdc06 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 14 Feb 2024 13:17:55 +0000
Subject: [PATCH 679/884] Remove unnecessary include

---
 src/Common/FailPoint.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index e0639df29b4..a23133b7522 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -1,4 +1,3 @@
-#include "config.h"
 #include <Common/Exception.h>
 #include <Common/FailPoint.h>
 #include <Common/Config/ConfigHelper.h>

From d5ab7d8df08fecb0235416e7c5b15630cae4201b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 14 Feb 2024 13:19:34 +0000
Subject: [PATCH 680/884] Keep header the same as before

---
 src/Common/FailPoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h
index 6aafbc0053a..613cfb15322 100644
--- a/src/Common/FailPoint.h
+++ b/src/Common/FailPoint.h
@@ -1,6 +1,6 @@
 #pragma once
-
 #include "config.h"
+
 #include <Common/Exception.h>
 #include <Core/Types.h>
 #include <Poco/Util/AbstractConfiguration.h>

From e95726f57c3e9ff2d3a6cd466c5820323d33963e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 14 Feb 2024 13:21:23 +0000
Subject: [PATCH 681/884] Analyzer: Fix test_shard_level_const_function

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp   | 2 +-
 tests/analyzer_integration_broken_tests.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 2510d35f720..5a13103d6ef 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5667,7 +5667,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
 
         /// Do not constant fold get scalar functions
         bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" ||
-            function_name == "shardCount" || function_name == "hostName";
+            function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort";
 
         /** If function is suitable for constant folding try to convert it to constant.
           * Example: SELECT plus(1, 1);
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index b2576bfdb2e..52799085622 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -10,7 +10,6 @@ test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_tas
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_select_access_rights/test_main.py::test_alias_columns
 test_settings_profile/test.py::test_show_profiles
-test_shard_level_const_function/test.py::test_remote
 test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
 test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
 test_wrong_db_or_table_name/test.py::test_wrong_table_name

From 40d4fc1c24e41639230030ae232bc43aec705bac Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 14 Feb 2024 12:11:45 +0100
Subject: [PATCH 682/884] More resilient Disk related operations

---
 programs/keeper-converter/KeeperConverter.cpp |   3 +-
 src/Coordination/Changelog.cpp                | 209 ++++++++++++------
 src/Coordination/CoordinationSettings.h       |   5 +-
 .../KeeperAsynchronousMetrics.cpp             |   8 +-
 src/Coordination/KeeperContext.cpp            |  20 +-
 src/Coordination/KeeperContext.h              |  19 +-
 src/Coordination/KeeperDispatcher.cpp         |   2 +-
 src/Coordination/KeeperServer.cpp             |  19 +-
 src/Coordination/KeeperServer.h               |   2 -
 src/Coordination/KeeperSnapshotManager.cpp    |  75 ++++++-
 src/Coordination/KeeperStateMachine.cpp       |  12 +-
 src/Coordination/KeeperStateMachine.h         |   3 -
 src/Coordination/KeeperStateManager.cpp       |  21 +-
 src/Coordination/KeeperStateManager.h         |   1 -
 src/Coordination/tests/gtest_coordination.cpp |  46 ++--
 utils/keeper-data-dumper/main.cpp             |   4 +-
 16 files changed, 300 insertions(+), 149 deletions(-)

diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp
index 92bdea28738..8cd50d0892f 100644
--- a/programs/keeper-converter/KeeperConverter.cpp
+++ b/programs/keeper-converter/KeeperConverter.cpp
@@ -1,6 +1,7 @@
 #include <iostream>
 #include <boost/program_options.hpp>
 
+#include <Coordination/CoordinationSettings.h>
 #include <Coordination/KeeperSnapshotManager.h>
 #include <Coordination/ZooKeeperDataReader.h>
 #include <Common/TerminalSize.h>
@@ -39,7 +40,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
 
     try
     {
-        auto keeper_context = std::make_shared<KeeperContext>(true);
+        auto keeper_context = std::make_shared<KeeperContext>(true, std::make_shared<CoordinationSettings>());
         keeper_context->setDigestEnabled(true);
         keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
 
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 5a58932606e..40ece0e7d2e 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1,5 +1,7 @@
+#include <chrono>
 #include <filesystem>
 #include <Coordination/Changelog.h>
+#include <Coordination/CoordinationSettings.h>
 #include <Disks/DiskLocal.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
@@ -35,21 +37,86 @@ namespace
 
 constexpr std::string_view tmp_prefix = "tmp_";
 
-void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to)
+void moveFileBetweenDisks(
+    DiskPtr disk_from,
+    ChangelogFileDescriptionPtr description,
+    DiskPtr disk_to,
+    const std::string & path_to,
+    const KeeperContextPtr & keeper_context)
 {
+    auto logger = getLogger("Changelog");
+    LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", description->path, path_to, disk_from->getName(), disk_to->getName());
     /// we use empty file with prefix tmp_ to detect incomplete copies
     /// if a copy is complete we don't care from which disk we use the same file
     /// so it's okay if a failure happens after removing of tmp file but before we remove
     /// the changelog from the source disk
     auto from_path = fs::path(description->path);
     auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string());
+
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
+    auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
+    auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
+    auto run_with_retries = [&](const auto & op, std::string_view operation_description)
     {
-        auto buf = disk_to->writeFile(tmp_changelog_name);
-        buf->finalize();
+        /// we limit the amount of retries during initialization phase because shutdown won't be set
+        /// before initialization is done, i.e. we would be stuck in infinite loop
+        size_t retry_num = 0;
+        do
+        {
+            try
+            {
+                op();
+                return true;
+            }
+            catch (...)
+            {
+                tryLogCurrentException(
+                    logger,
+                    fmt::format(
+                        "While moving changelog {} to disk {} and running '{}'",
+                        description->path,
+                        disk_to->getName(),
+                        operation_description));
+                std::this_thread::sleep_for(retries_sleep);
+            }
+
+            ++retry_num;
+            if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init)
+            {
+                LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description);
+                break;
+            }
+
+        } while (!keeper_context->isShutdownCalled());
+
+        LOG_ERROR(
+            getLogger("Changelog"),
+            "Failed to run '{}' while moving changelog {} to disk {}",
+            operation_description,
+            description->path,
+            disk_to->getName());
+        return false;
+    };
+
+    std::array<std::pair<std::function<void()>, std::string_view>, 4> operations{
+        std::pair{
+            [&]
+            {
+                auto buf = disk_to->writeFile(tmp_changelog_name);
+                buf->finalize();
+            },
+            "creating temporary file"},
+        std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"},
+        std::pair{[&] { disk_to->removeFileIfExists(tmp_changelog_name); }, "removing temporary file"},
+        std::pair{[&] { disk_from->removeFileIfExists(description->path); }, "removing changelog file from source disk"},
+    };
+
+    for (const auto & [op, operation_description] : operations)
+    {
+        if (!run_with_retries(op, operation_description))
+            return;
     }
-    disk_from->copyFile(from_path, *disk_to, path_to, {});
-    disk_to->removeFile(tmp_changelog_name);
-    disk_from->removeFile(description->path);
+
     description->path = path_to;
     description->disk = disk_to;
 }
@@ -173,7 +240,7 @@ public:
                     }
                     else
                     {
-                        moveFileBetweenDisks(log_disk, current_file_description, disk, new_path);
+                        moveFileBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context);
                     }
                 }
             }
@@ -196,7 +263,7 @@ public:
         }
         catch (...)
         {
-            tryLogCurrentException(log);
+            tryLogCurrentException(log, "While setting new changelog file");
             throw;
         }
     }
@@ -813,7 +880,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         auto disk = getDisk();
 
         if (latest_log_disk != disk && latest_log_disk == description->disk)
-            moveFileBetweenDisks(latest_log_disk, description, disk, description->path);
+            moveFileBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context);
     };
 
     /// we can have empty log (with zero entries) and last_log_read_result will be initialized
@@ -899,7 +966,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         }
 
         if (description->disk != disk)
-            moveFileBetweenDisks(description->disk, description, disk, description->path);
+            moveFileBetweenDisks(description->disk, description, disk, description->path, keeper_context);
     }
 
 
@@ -921,7 +988,7 @@ void Changelog::initWriter(ChangelogFileDescriptionPtr description)
     auto log_disk = description->disk;
     auto latest_log_disk = getLatestLogDisk();
     if (log_disk != latest_log_disk)
-        moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path);
+        moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
 
     current_writer->setFile(std::move(description), WriteMode::Append);
 }
@@ -984,11 +1051,11 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
             catch (const DB::Exception & e)
             {
                 if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED)
-                    moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path);
+                    moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
             }
         }
         else
-            moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path);
+            moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
 
         itr = existing_changelogs.erase(itr);
     }
@@ -1085,70 +1152,78 @@ void Changelog::writeThread()
             LOG_WARNING(log, "Changelog is shut down");
     };
 
-    /// NuRaft writes a batch of request by first calling multiple store requests, i.e. AppendLog
-    /// finished by a flush request
-    /// We assume that after some number of appends, we always get flush request
-    while (true)
+    try
     {
-        if (try_batch_flush)
+        /// NuRaft writes a batch of request by first calling multiple store requests, i.e. AppendLog
+        /// finished by a flush request
+        /// We assume that after some number of appends, we always get flush request
+        while (true)
         {
-            try_batch_flush = false;
-            /// we have Flush request stored in write operation
-            /// but we try to get new append operations
-            /// if there are none, we apply the currently set Flush
-            chassert(std::holds_alternative<Flush>(write_operation));
-            if (!write_operations.tryPop(write_operation))
+            if (try_batch_flush)
             {
-                chassert(batch_append_ok);
-                const auto & flush = std::get<Flush>(write_operation);
-                flush_logs(flush);
-                notify_append_completion();
-                if (!write_operations.pop(write_operation))
-                    break;
-            }
-        }
-        else if (!write_operations.pop(write_operation))
-        {
-            break;
-        }
-
-        assert(initialized);
-
-        if (auto * append_log = std::get_if<AppendLog>(&write_operation))
-        {
-            if (!batch_append_ok)
-                continue;
-
-            std::lock_guard writer_lock(writer_mutex);
-            assert(current_writer);
-
-            batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry));
-            ++pending_appends;
-        }
-        else
-        {
-            const auto & flush = std::get<Flush>(write_operation);
-
-            if (batch_append_ok)
-            {
-                /// we can try batching more logs for flush
-                if (pending_appends < flush_settings.max_flush_batch_size)
+                try_batch_flush = false;
+                /// we have Flush request stored in write operation
+                /// but we try to get new append operations
+                /// if there are none, we apply the currently set Flush
+                chassert(std::holds_alternative<Flush>(write_operation));
+                if (!write_operations.tryPop(write_operation))
                 {
-                    try_batch_flush = true;
-                    continue;
+                    chassert(batch_append_ok);
+                    const auto & flush = std::get<Flush>(write_operation);
+                    flush_logs(flush);
+                    notify_append_completion();
+                    if (!write_operations.pop(write_operation))
+                        break;
                 }
-                /// we need to flush because we have maximum allowed pending records
-                flush_logs(flush);
+            }
+            else if (!write_operations.pop(write_operation))
+            {
+                break;
+            }
+
+            assert(initialized);
+
+            if (auto * append_log = std::get_if<AppendLog>(&write_operation))
+            {
+                if (!batch_append_ok)
+                    continue;
+
+                std::lock_guard writer_lock(writer_mutex);
+                assert(current_writer);
+
+                batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry));
+                ++pending_appends;
             }
             else
             {
-                std::lock_guard lock{durable_idx_mutex};
-                *flush.failed = true;
+                const auto & flush = std::get<Flush>(write_operation);
+
+                if (batch_append_ok)
+                {
+                    /// we can try batching more logs for flush
+                    if (pending_appends < flush_settings.max_flush_batch_size)
+                    {
+                        try_batch_flush = true;
+                        continue;
+                    }
+                    /// we need to flush because we have maximum allowed pending records
+                    flush_logs(flush);
+                }
+                else
+                {
+                    std::lock_guard lock{durable_idx_mutex};
+                    *flush.failed = true;
+                }
+                notify_append_completion();
+                batch_append_ok = true;
             }
-            notify_append_completion();
-            batch_append_ok = true;
         }
     }
+    catch (...)
+    {
+        tryLogCurrentException(log, "Write thread failed, aborting");
+        std::abort();
+    }
 }
 
 
@@ -1191,7 +1266,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
             auto log_disk = description->disk;
             auto latest_log_disk = getLatestLogDisk();
             if (log_disk != latest_log_disk)
-                moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path);
+                moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
 
             current_writer->setFile(std::move(description), WriteMode::Append);
 
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index a58f2b04797..358c6c4097e 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -52,7 +52,10 @@ struct Settings;
     M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
     M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
     M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
-    M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0)
+    M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
+    M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
+    M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0)
+
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
 
diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp
index 8f6e1dec6c1..96d4df39721 100644
--- a/src/Coordination/KeeperAsynchronousMetrics.cpp
+++ b/src/Coordination/KeeperAsynchronousMetrics.cpp
@@ -20,7 +20,6 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
     size_t ephemerals_count = 0;
     size_t approximate_data_size = 0;
     size_t key_arena_size = 0;
-    size_t latest_snapshot_size = 0;
     size_t open_file_descriptor_count = 0;
     std::optional<size_t> max_file_descriptor_count = 0;
     size_t followers = 0;
@@ -46,11 +45,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
         ephemerals_count = state_machine.getTotalEphemeralNodesCount();
         approximate_data_size = state_machine.getApproximateDataSize();
         key_arena_size = state_machine.getKeyArenaSize();
-        latest_snapshot_size = state_machine.getLatestSnapshotBufSize();
         session_with_watches = state_machine.getSessionsWithWatchesCount();
         paths_watched = state_machine.getWatchedPathsCount();
-        //snapshot_dir_size = keeper_dispatcher.getSnapDirSize();
-        //log_dir_size = keeper_dispatcher.getLogDirSize();
 
 #    if defined(__linux__) || defined(__APPLE__)
         open_file_descriptor_count = getCurrentProcessFDCount();
@@ -76,7 +72,9 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
 
     new_values["KeeperApproximateDataSize"] = { approximate_data_size, "The approximate data size of ClickHouse Keeper, in bytes." };
     new_values["KeeperKeyArenaSize"] = { key_arena_size, "The size in bytes of the memory arena for keys in ClickHouse Keeper." };
-    new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." };
+    /// TODO: value was incorrectly set to 0 previously for local snapshots
+    /// it needs to be fixed and it needs to be atomic to avoid deadlock
+    ///new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." };
 
     new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." };
     if (max_file_descriptor_count.has_value())
diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index baad8d98e6a..14db1ad2a72 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -1,13 +1,15 @@
 #include <Coordination/KeeperContext.h>
 
 #include <Coordination/Defines.h>
-#include <Disks/DiskLocal.h>
-#include <Interpreters/Context.h>
-#include <IO/S3/Credentials.h>
-#include <Poco/Util/AbstractConfiguration.h>
 #include <Coordination/KeeperConstants.h>
-#include <Common/logger_useful.h>
 #include <Coordination/KeeperFeatureFlags.h>
+#include <Disks/DiskLocal.h>
+#include <Disks/DiskSelector.h>
+#include <IO/S3/Credentials.h>
+#include <Interpreters/Context.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/logger_useful.h>
+
 #include <boost/algorithm/string.hpp>
 
 namespace DB
@@ -20,9 +22,10 @@ extern const int BAD_ARGUMENTS;
 
 }
 
-KeeperContext::KeeperContext(bool standalone_keeper_)
+KeeperContext::KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr coordination_settings_)
     : disk_selector(std::make_shared<DiskSelector>())
     , standalone_keeper(standalone_keeper_)
+    , coordination_settings(std::move(coordination_settings_))
 {
     /// enable by default some feature flags
     feature_flags.enableFeatureFlag(KeeperFeatureFlag::FILTERED_LIST);
@@ -416,4 +419,9 @@ void KeeperContext::waitLocalLogsPreprocessedOrShutdown()
     local_logs_preprocessed_cv.wait(lock, [this]{ return shutdown_called || local_logs_preprocessed; });
 }
 
+const CoordinationSettingsPtr & KeeperContext::getCoordinationSettings() const
+{
+    return coordination_settings;
+}
+
 }
diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h
index 891bef00446..a5cc2db49a2 100644
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@@ -1,7 +1,5 @@
 #pragma once
 #include <Coordination/KeeperFeatureFlags.h>
-#include <Disks/DiskSelector.h>
-#include <IO/WriteBufferFromString.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <condition_variable>
 #include <cstdint>
@@ -12,10 +10,19 @@ namespace DB
 
 class KeeperDispatcher;
 
+struct CoordinationSettings;
+using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
+
+class DiskSelector;
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+
+class WriteBufferFromOwnString;
+
 class KeeperContext
 {
 public:
-    explicit KeeperContext(bool standalone_keeper_);
+    KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr coordination_settings_);
 
     enum class Phase : uint8_t
     {
@@ -68,6 +75,8 @@ public:
 
     void waitLocalLogsPreprocessedOrShutdown();
 
+    const CoordinationSettingsPtr & getCoordinationSettings() const;
+
 private:
     /// local disk defined using path or disk name
     using Storage = std::variant<DiskPtr, std::string>;
@@ -89,7 +98,7 @@ private:
     std::atomic<bool> local_logs_preprocessed = false;
     std::atomic<bool> shutdown_called = false;
 
-    Phase server_state{Phase::INIT};
+    std::atomic<Phase> server_state{Phase::INIT};
 
     bool ignore_system_path_on_startup{false};
     bool digest_enabled{true};
@@ -113,6 +122,8 @@ private:
     KeeperDispatcher * dispatcher{nullptr};
 
     std::atomic<UInt64> memory_soft_limit = 0;
+
+    CoordinationSettingsPtr coordination_settings;
 };
 
 using KeeperContextPtr = std::shared_ptr<KeeperContext>;
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 35bc953a705..4bd10352d3e 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -414,8 +414,8 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
 {
     LOG_DEBUG(log, "Initializing storage dispatcher");
 
-    keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
     configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
+    keeper_context = std::make_shared<KeeperContext>(standalone_keeper, configuration_and_settings->coordination_settings);
 
     keeper_context->initialize(config, this);
 
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 722b1303cc8..e3fd14c0e1a 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -119,20 +119,18 @@ KeeperServer::KeeperServer(
     KeeperSnapshotManagerS3 & snapshot_manager_s3,
     KeeperStateMachine::CommitCallback commit_callback)
     : server_id(configuration_and_settings_->server_id)
-    , coordination_settings(configuration_and_settings_->coordination_settings)
     , log(getLogger("KeeperServer"))
     , is_recovering(config.getBool("keeper_server.force_recovery", false))
     , keeper_context{std::move(keeper_context_)}
     , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
     , enable_reconfiguration(config.getBool("keeper_server.enable_reconfiguration", false))
 {
-    if (coordination_settings->quorum_reads)
+    if (keeper_context->getCoordinationSettings()->quorum_reads)
         LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
 
     state_machine = nuraft::cs_new<KeeperStateMachine>(
         responses_queue_,
         snapshots_queue_,
-        coordination_settings,
         keeper_context,
         config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr,
         commit_callback,
@@ -143,7 +141,6 @@ KeeperServer::KeeperServer(
         "keeper_server",
         "state",
         config,
-        coordination_settings,
         keeper_context);
 }
 
@@ -226,7 +223,7 @@ void KeeperServer::loadLatestConfig()
 {
     auto latest_snapshot_config = state_machine->getClusterConfig();
     auto latest_log_store_config = state_manager->getLatestConfigFromLogStore();
-    auto async_replication = coordination_settings->async_replication;
+    auto async_replication = keeper_context->getCoordinationSettings()->async_replication;
 
     if (latest_snapshot_config && latest_log_store_config)
     {
@@ -293,6 +290,8 @@ void KeeperServer::forceRecovery()
 
 void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6)
 {
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
+
     nuraft::raft_params params;
     params.parallel_log_appending_ = true;
     params.heart_beat_interval_
@@ -427,6 +426,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
 {
     state_machine->init();
 
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
     state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items);
 
     auto log_store = state_manager->load_log_store();
@@ -446,7 +446,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
 
 void KeeperServer::shutdownRaftServer()
 {
-    size_t timeout = coordination_settings->shutdown_timeout.totalSeconds();
+    size_t timeout = keeper_context->getCoordinationSettings()->shutdown_timeout.totalSeconds();
 
     if (!raft_instance)
     {
@@ -870,7 +870,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
                 /// Node first became leader, and after that some other node became leader.
                 /// BecameFresh for this node will not be called because it was already fresh
                 /// when it was leader.
-                if (leader_index < our_index + coordination_settings->fresh_log_gap)
+                if (leader_index < our_index + keeper_context->getCoordinationSettings()->fresh_log_gap)
                     set_initialized();
             }
             return nuraft::cb_func::ReturnCode::Ok;
@@ -905,7 +905,7 @@ void KeeperServer::waitInit()
 {
     std::unique_lock lock(initialized_mutex);
 
-    int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
+    int64_t timeout = keeper_context->getCoordinationSettings()->startup_timeout.totalMilliseconds();
     if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
         LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout);
 }
@@ -977,6 +977,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
 
 ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
 {
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
     auto diff = state_manager->getRaftConfigurationDiff(config, coordination_settings);
 
     if (!diff.empty())
@@ -1004,6 +1005,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi
         std::this_thread::sleep_for(sleep_time * (i + 1));
     };
 
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
     if (const auto * add = std::get_if<AddRaftServer>(&action))
     {
         for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
@@ -1059,6 +1061,7 @@ bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAc
     auto became_leader = [&] { LOG_INFO(log, "Became leader, aborting"); return false; };
     auto backoff = [&](size_t i) { std::this_thread::sleep_for(sleep_time * (i + 1)); };
 
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
     if (const auto* add = std::get_if<AddRaftServer>(&action))
     {
         for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index ef298df3efc..dd54539a92b 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -22,8 +22,6 @@ class KeeperServer
 private:
     const int server_id;
 
-    CoordinationSettingsPtr coordination_settings;
-
     nuraft::ptr<KeeperStateMachine> state_machine;
 
     nuraft::ptr<KeeperStateManager> state_manager;
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index 091571b4a1a..61bcdf023cf 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -3,6 +3,7 @@
 #include <Coordination/KeeperSnapshotManager.h>
 #include <Coordination/ReadBufferFromNuraftBuffer.h>
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <Coordination/CoordinationSettings.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
@@ -34,21 +35,77 @@ namespace
 {
     constexpr std::string_view tmp_prefix = "tmp_";
 
-    void moveFileBetweenDisks(DiskPtr disk_from, const std::string & path_from, DiskPtr disk_to, const std::string & path_to)
+    void moveFileBetweenDisks(
+        DiskPtr disk_from,
+        const std::string & path_from,
+        DiskPtr disk_to,
+        const std::string & path_to,
+        const KeeperContextPtr & keeper_context)
     {
+        auto logger = getLogger("KeeperSnapshotManager");
+        LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName());
         /// we use empty file with prefix tmp_ to detect incomplete copies
         /// if a copy is complete we don't care from which disk we use the same file
         /// so it's okay if a failure happens after removing of tmp file but before we remove
         /// the snapshot from the source disk
         auto from_path = fs::path(path_from);
         auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string());
+
+        const auto & coordination_settings = keeper_context->getCoordinationSettings();
+        auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
+        auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
+        auto run_with_retries = [&](const auto & op, std::string_view operation_description)
         {
-            auto buf = disk_to->writeFile(tmp_snapshot_name);
-            buf->finalize();
-        }
-        disk_from->copyFile(from_path, *disk_to, path_to, {});
-        disk_to->removeFile(tmp_snapshot_name);
-        disk_from->removeFile(path_from);
+            size_t retry_num = 0;
+            do
+            {
+                try
+                {
+                    op();
+                    return true;
+                }
+                catch (...)
+                {
+                    tryLogCurrentException(
+                        logger,
+                        fmt::format(
+                            "While moving snapshot {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description));
+                    std::this_thread::sleep_for(retries_sleep);
+                }
+
+                ++retry_num;
+                if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init)
+                {
+                    LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description);
+                    break;
+                }
+            } while (!keeper_context->isShutdownCalled());
+
+            LOG_ERROR(
+                logger,
+                "Failed to run '{}' while moving snapshot {} to disk {}",
+                operation_description,
+                path_from,
+                disk_to->getName());
+            return false;
+        };
+
+        std::array<std::pair<std::function<void()>, std::string_view>, 4> operations{
+            std::pair{
+                [&]
+                {
+                    auto buf = disk_to->writeFile(tmp_snapshot_name);
+                    buf->finalize();
+                },
+                "creating temporary file"},
+            std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"},
+            std::pair{[&] { disk_to->removeFileIfExists(tmp_snapshot_name); }, "removing temporary file"},
+            std::pair{[&] { disk_from->removeFileIfExists(path_from); }, "removing snapshot file from source disk"},
+        };
+
+        for (const auto & [op, operation_description] : operations)
+            if (!run_with_retries(op, operation_description))
+                return;
     }
 
     uint64_t getSnapshotPathUpToLogIdx(const String & snapshot_path)
@@ -774,7 +831,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
         {
             if (file_info.disk != latest_snapshot_disk)
             {
-                moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path);
+                moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context);
                 file_info.disk = latest_snapshot_disk;
             }
         }
@@ -782,7 +839,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
         {
             if (file_info.disk != disk)
             {
-                moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path);
+                moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context);
                 file_info.disk = disk;
             }
         }
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index c82f8301eff..f83a49833a7 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -42,22 +42,20 @@ namespace ErrorCodes
 KeeperStateMachine::KeeperStateMachine(
     ResponsesQueue & responses_queue_,
     SnapshotsQueue & snapshots_queue_,
-    const CoordinationSettingsPtr & coordination_settings_,
     const KeeperContextPtr & keeper_context_,
     KeeperSnapshotManagerS3 * snapshot_manager_s3_,
     CommitCallback commit_callback_,
     const std::string & superdigest_)
     : commit_callback(commit_callback_)
-    , coordination_settings(coordination_settings_)
     , snapshot_manager(
-          coordination_settings->snapshots_to_keep,
+          keeper_context_->getCoordinationSettings()->snapshots_to_keep,
           keeper_context_,
-          coordination_settings->compress_snapshots_with_zstd_format,
+          keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format,
           superdigest_,
-          coordination_settings->dead_session_check_period_ms.totalMilliseconds())
+          keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds())
     , responses_queue(responses_queue_)
     , snapshots_queue(snapshots_queue_)
-    , min_request_size_to_cache(coordination_settings_->min_request_size_for_cache)
+    , min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache)
     , last_committed_idx(0)
     , log(getLogger("KeeperStateMachine"))
     , superdigest(superdigest_)
@@ -129,7 +127,7 @@ void KeeperStateMachine::init()
 
     if (!storage)
         storage = std::make_unique<KeeperStorage>(
-            coordination_settings->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context);
+            keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context);
 }
 
 namespace
diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h
index b11cd53c00e..f0a565aed8a 100644
--- a/src/Coordination/KeeperStateMachine.h
+++ b/src/Coordination/KeeperStateMachine.h
@@ -25,7 +25,6 @@ public:
     KeeperStateMachine(
         ResponsesQueue & responses_queue_,
         SnapshotsQueue & snapshots_queue_,
-        const CoordinationSettingsPtr & coordination_settings_,
         const KeeperContextPtr & keeper_context_,
         KeeperSnapshotManagerS3 * snapshot_manager_s3_,
         CommitCallback commit_callback_ = {},
@@ -139,8 +138,6 @@ private:
     SnapshotFileInfo latest_snapshot_info;
     nuraft::ptr<nuraft::buffer> latest_snapshot_buf = nullptr;
 
-    CoordinationSettingsPtr coordination_settings;
-
     /// Main state machine logic
     KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock);
 
diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp
index 4fbb9b52e6e..3f9c7aa3e44 100644
--- a/src/Coordination/KeeperStateManager.cpp
+++ b/src/Coordination/KeeperStateManager.cpp
@@ -241,23 +241,20 @@ KeeperStateManager::KeeperStateManager(
     const std::string & config_prefix_,
     const std::string & server_state_file_name_,
     const Poco::Util::AbstractConfiguration & config,
-    const CoordinationSettingsPtr & coordination_settings,
     KeeperContextPtr keeper_context_)
     : my_server_id(my_server_id_)
     , secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false))
     , config_prefix(config_prefix_)
-    , configuration_wrapper(parseServersConfiguration(config, false, coordination_settings->async_replication))
+    , configuration_wrapper(parseServersConfiguration(config, false, keeper_context_->getCoordinationSettings()->async_replication))
     , log_store(nuraft::cs_new<KeeperLogStore>(
-          LogFileSettings
-          {
-              .force_sync = coordination_settings->force_sync,
-              .compress_logs = coordination_settings->compress_logs,
-              .rotate_interval = coordination_settings->rotate_log_storage_interval,
-              .max_size = coordination_settings->max_log_file_size,
-              .overallocate_size = coordination_settings->log_file_overallocate_size},
-          FlushSettings
-          {
-              .max_flush_batch_size = coordination_settings->max_flush_batch_size,
+          LogFileSettings{
+              .force_sync = keeper_context_->getCoordinationSettings()->force_sync,
+              .compress_logs = keeper_context_->getCoordinationSettings()->compress_logs,
+              .rotate_interval = keeper_context_->getCoordinationSettings()->rotate_log_storage_interval,
+              .max_size = keeper_context_->getCoordinationSettings()->max_log_file_size,
+              .overallocate_size = keeper_context_->getCoordinationSettings()->log_file_overallocate_size},
+          FlushSettings{
+              .max_flush_batch_size = keeper_context_->getCoordinationSettings()->max_flush_batch_size,
           },
           keeper_context_))
     , server_state_file_name(server_state_file_name_)
diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h
index 02dd6b2ff53..60f6dbe7b62 100644
--- a/src/Coordination/KeeperStateManager.h
+++ b/src/Coordination/KeeperStateManager.h
@@ -23,7 +23,6 @@ public:
         const std::string & config_prefix_,
         const std::string & server_state_file_name_,
         const Poco::Util::AbstractConfiguration & config,
-        const CoordinationSettingsPtr & coordination_settings,
         KeeperContextPtr keeper_context_);
 
     /// Constructor for tests
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index bd9dc4c3fd3..c6d98e6f3dd 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -65,7 +65,7 @@ struct CompressionParam
 class CoordinationTest : public ::testing::TestWithParam<CompressionParam>
 {
 protected:
-    DB::KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true);
+    DB::KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true, std::make_shared<DB::CoordinationSettings>());
     LoggerPtr log{getLogger("CoordinationTest")};
 
     void SetUp() override
@@ -1758,7 +1758,6 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co
 }
 
 void testLogAndStateMachine(
-    Coordination::CoordinationSettingsPtr settings,
     uint64_t total_logs,
     bool enable_compression,
     Coordination::KeeperContextPtr keeper_context)
@@ -1766,6 +1765,7 @@ void testLogAndStateMachine(
     using namespace Coordination;
     using namespace DB;
 
+    const auto & settings = keeper_context->getCoordinationSettings();
     ChangelogDirTest snapshots("./snapshots");
     keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots"));
     ChangelogDirTest logs("./logs");
@@ -1773,7 +1773,7 @@ void testLogAndStateMachine(
 
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
-    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
     state_machine->init();
     DB::KeeperLogStore changelog(
         DB::LogFileSettings{
@@ -1816,7 +1816,7 @@ void testLogAndStateMachine(
     }
 
     SnapshotsQueue snapshots_queue1{1};
-    auto restore_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue1, settings, keeper_context, nullptr);
+    auto restore_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue1, keeper_context, nullptr);
     restore_machine->init();
     EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance);
 
@@ -1863,63 +1863,72 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore)
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 37, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(37, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 11, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(11, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(40, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 20;
         settings->rotate_log_storage_interval = 30;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(40, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 0;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(40, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 1;
         settings->reserved_log_items = 1;
         settings->rotate_log_storage_interval = 32;
-        testLogAndStateMachine(settings, 32, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(32, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 7;
         settings->rotate_log_storage_interval = 1;
-        testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(33, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 37;
         settings->reserved_log_items = 1000;
         settings->rotate_log_storage_interval = 5000;
-        testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(33, params.enable_compression, local_keeper_context);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 37;
         settings->reserved_log_items = 1000;
         settings->rotate_log_storage_interval = 5000;
-        testLogAndStateMachine(settings, 45, params.enable_compression, keeper_context);
+        auto local_keeper_context = std::make_shared<KeeperContext>(true, settings);
+        testLogAndStateMachine(45, params.enable_compression, local_keeper_context);
     }
 }
 
@@ -1931,11 +1940,10 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
     ChangelogDirTest snapshots("./snapshots");
     setSnapshotDirectory("./snapshots");
 
-    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
-
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
-    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
+
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
     state_machine->init();
 
     std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
@@ -1965,11 +1973,10 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte
 
     ChangelogDirTest snapshots("./snapshots");
     setSnapshotDirectory("./snapshots");
-    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
 
-    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
     state_machine->init();
 
     String user_auth_data = "test_user:test_password";
@@ -2017,11 +2024,10 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted)
     ChangelogDirTest snapshots("./snapshots");
     setSnapshotDirectory("./snapshots");
 
-    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
 
-    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
     state_machine->init();
 
     String user_auth_data = "test_user:test_password";
diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp
index 351a4ab90bc..21626665a42 100644
--- a/utils/keeper-data-dumper/main.cpp
+++ b/utils/keeper-data-dumper/main.cpp
@@ -63,11 +63,11 @@ int main(int argc, char *argv[])
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
     CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
-    KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true);
+    KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true, settings);
     keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", argv[2]));
     keeper_context->setSnapshotDisk(std::make_shared<DB::DiskLocal>("SnapshotDisk", argv[1]));
 
-    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
     state_machine->init();
     size_t last_commited_index = state_machine->last_commit_index();
 

From c54a1d1d9777208e2c66d303cdf74b11dc0e7c02 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 14 Feb 2024 13:54:26 +0000
Subject: [PATCH 683/884] Add newlines to SettingsChangesHistory to maybe have
 less conflicts

---
 src/Core/SettingsChangesHistory.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index f22bb39e774..64650bf0ef5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -84,7 +84,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"24.2", {{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
+    {"24.2", {
+              {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
               {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
               {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
               {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
@@ -99,7 +100,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
               {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
               {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
-              {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}}},
+              {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."},
+              }},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

From c1897e74a83101130ffa369dd6c58993c17f5e6f Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 14 Feb 2024 14:04:25 +0000
Subject: [PATCH 684/884] fix

---
 src/Functions/FunctionBinaryArithmetic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 967c2b73881..9b4249b0aef 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -1708,9 +1708,9 @@ public:
                                 if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
                                     throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
                             }
-                            ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
-                            type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                         }
+                        ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
+                        type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                     }
                     else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
                         (IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)))

From 8fe9ede2078b4e52c40490622580a72b7839144f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 14 Feb 2024 15:09:41 +0100
Subject: [PATCH 685/884] Add a test

---
 src/Storages/RabbitMQ/RabbitMQSource.cpp      | 12 +--
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp     |  2 +-
 .../configs/mergetree.xml                     |  5 ++
 .../integration/test_storage_rabbitmq/test.py | 75 ++++++++++++++++++-
 4 files changed, 86 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_storage_rabbitmq/configs/mergetree.xml

diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index 6c50d440373..25e4b120f42 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -120,12 +120,6 @@ Chunk RabbitMQSource::generateImpl()
     {
         auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds());
         consumer = storage.popConsumer(timeout);
-
-        if (consumer->needChannelUpdate())
-        {
-            LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID());
-            consumer->updateChannel(storage.getConnection());
-        }
     }
 
     if (is_finished || !consumer || consumer->isConsumerStopped())
@@ -135,6 +129,12 @@ Chunk RabbitMQSource::generateImpl()
         return {};
     }
 
+    if (consumer->needChannelUpdate())
+    {
+        LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID());
+        consumer->updateChannel(storage.getConnection());
+    }
+
     /// Currently it is one time usage source: to make sure data is flushed
     /// strictly by timeout or by block size.
     is_finished = true;
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 880602bf272..ec2048cca70 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -1084,7 +1084,7 @@ bool StorageRabbitMQ::tryStreamToViews()
     }
     catch (...)
     {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
+        LOG_ERROR(log, "Failed to push to views. Error: {}", getCurrentExceptionMessage(true));
         write_failed = true;
     }
 
diff --git a/tests/integration/test_storage_rabbitmq/configs/mergetree.xml b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml
new file mode 100644
index 00000000000..61eba8face7
--- /dev/null
+++ b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <merge_tree>
+        <parts_to_throw_insert>0</parts_to_throw_insert>
+    </merge_tree>
+</clickhouse>
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index d129543d68f..a6aee67e806 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -37,6 +37,18 @@ instance2 = cluster.add_instance(
     with_rabbitmq=True,
 )
 
+instance3 = cluster.add_instance(
+    "instance3",
+    user_configs=["configs/users.xml"],
+    main_configs=[
+        "configs/rabbitmq.xml",
+        "configs/macros.xml",
+        "configs/named_collection.xml",
+        "configs/mergetree.xml",
+    ],
+    with_rabbitmq=True,
+)
+
 # Helpers
 
 
@@ -84,6 +96,7 @@ def rabbitmq_cluster():
         cluster.start()
         logging.debug("rabbitmq_id is {}".format(instance.cluster.rabbitmq_docker_id))
         instance.query("CREATE DATABASE test")
+        instance3.query("CREATE DATABASE test")
 
         yield cluster
 
@@ -3551,4 +3564,64 @@ def test_attach_broken_table(rabbitmq_cluster):
     assert "CANNOT_CONNECT_RABBITMQ" in error
 
 
-# TODO: add a test
+def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
+    table_name = "nack_failed_insert"
+    exchange = f"{table_name}_exchange"
+    instance3.query(
+        f"""
+        CREATE TABLE test.{table_name} (key UInt64, value UInt64)
+            ENGINE = RabbitMQ
+            SETTINGS rabbitmq_host_port = '{rabbitmq_cluster.rabbitmq_host}:5672',
+                     rabbitmq_flush_interval_ms=1000,
+                     rabbitmq_exchange_name = '{exchange}',
+                     rabbitmq_format = 'JSONEachRow';
+
+        DROP TABLE IF EXISTS test.view;
+        CREATE TABLE test.view (key UInt64, value UInt64)
+            ENGINE = MergeTree()
+            ORDER BY key;
+
+        DROP TABLE IF EXISTS test.consumer;
+        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
+            SELECT * FROM test.{table_name};
+        """
+    )
+
+    credentials = pika.PlainCredentials("root", "clickhouse")
+    parameters = pika.ConnectionParameters(
+        rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials
+    )
+    connection = pika.BlockingConnection(parameters)
+    channel = connection.channel()
+
+    num_rows = 25
+    for i in range(num_rows):
+        message = json.dumps({"key": i, "value": i}) + "\n"
+        channel.basic_publish(exchange=exchange, routing_key="", body=message)
+
+    connection.close()
+
+    instance3.wait_for_log_line("Failed to push to views. Error: Code: 252. DB::Exception: Too many parts")
+
+    instance3.replace_in_config(
+        "/etc/clickhouse-server/config.d/mergetree.xml",
+        "parts_to_throw_insert>1",
+        "parts_to_throw_insert>10",
+    )
+    attempt = 0
+    count = 0
+    while attempt < 100:
+        count = int(instance3.query("SELECT count() FROM test.view"))
+        if count == num_rows:
+            break
+        attempt += 1
+
+    assert count == num_rows
+
+    instance3.query(
+        f"""
+        DROP TABLE test.consumer;
+        DROP TABLE test.view;
+        DROP TABLE test.{table_name};
+    """
+    )

From 5f570b574e45162a8e050bc93fbb660e1a934286 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 14 Feb 2024 14:35:59 +0000
Subject: [PATCH 686/884] Automatic style fix

---
 tests/integration/test_storage_rabbitmq/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index a6aee67e806..5298d5d8ce2 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -3601,7 +3601,9 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
 
     connection.close()
 
-    instance3.wait_for_log_line("Failed to push to views. Error: Code: 252. DB::Exception: Too many parts")
+    instance3.wait_for_log_line(
+        "Failed to push to views. Error: Code: 252. DB::Exception: Too many parts"
+    )
 
     instance3.replace_in_config(
         "/etc/clickhouse-server/config.d/mergetree.xml",

From dca627026c7a67df3e89bd5cdcce80a4edbba151 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 19:51:14 +0300
Subject: [PATCH 687/884] ReplicatedMergeTree ALTER update metadata_version in
 single transaction

---
 src/Storages/StorageReplicatedMergeTree.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6119541ff52..f2fbb47957e 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5817,6 +5817,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
     Coordination::Requests requests;
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "columns", entry.columns_str, -1));
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata", entry.metadata_str, -1));
+    requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(entry.alter_version), -1));
 
     auto table_id = getStorageID();
     auto alter_context = getContext();
@@ -5863,10 +5864,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
         resetObjectColumnsFromActiveParts(parts_lock);
     }
 
-    /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node
-    /// TODO Maybe do in in one transaction for Replicated database?
-    zookeeper->createOrUpdate(fs::path(replica_path) / "metadata_version", std::to_string(current_metadata->getMetadataVersion()), zkutil::CreateMode::Persistent);
-
     return true;
 }
 

From be490291afde34d16bb7f6699bb4ad67e6eaeb12 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 14 Feb 2024 15:05:47 +0300
Subject: [PATCH 688/884] Updated implementation

---
 src/Storages/StorageReplicatedMergeTree.cpp   |  9 ++++-
 ...ge_tree_invalid_metadata_version.reference |  9 +++++
 ...ed_merge_tree_invalid_metadata_version.sql | 33 +++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
 create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f2fbb47957e..8e1598a1eef 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -513,8 +513,15 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
             if (same_structure)
             {
                 Coordination::Stat metadata_stat;
-                current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
+                current_zookeeper->get(fs::path(zookeeper_path) / "metadata", &metadata_stat);
+
+                /** We change metadata_snapshot so that `createReplica` method will create `metadata_version` node in ZooKeeper
+                  * with version of table '/metadata' node in Zookeeper.
+                  *
+                  * Otherwise `metadata_version` for not first replica will be initialized with 0 by default.
+                  */
                 setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version));
+                metadata_snapshot = getInMemoryMetadataPtr();
             }
         }
         catch (Coordination::Exception & e)
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
new file mode 100644
index 00000000000..15b2568912c
--- /dev/null
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
@@ -0,0 +1,9 @@
+Row 1:
+──────
+name:  metadata_version
+value: 1
+--
+id	UInt64					
+value	String					
+insert_time	DateTime					
+insert_time_updated	DateTime					
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
new file mode 100644
index 00000000000..93ebe65bcde
--- /dev/null
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
@@ -0,0 +1,33 @@
+-- Tags: zookeeper
+
+DROP TABLE IF EXISTS test_table_replicated;
+CREATE TABLE test_table_replicated
+(
+    id UInt64,
+    value String
+) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id;
+
+ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
+
+DROP TABLE test_table_replicated;
+
+CREATE TABLE test_table_replicated
+(
+    id UInt64,
+    value String,
+    insert_time DateTime
+) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id;
+
+SELECT name, value FROM system.zookeeper
+WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
+AND name = 'metadata_version' FORMAT Vertical;
+
+SYSTEM RESTART REPLICA test_table_replicated;
+
+ALTER TABLE test_table_replicated ADD COLUMN insert_time_updated DateTime;
+
+SELECT '--';
+
+DESCRIBE test_table_replicated;
+
+DROP TABLE test_table_replicated;

From c11aa589554bc3e16c89b9e58f374bf31d521d1f Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 14 Feb 2024 18:03:01 +0300
Subject: [PATCH 689/884] Fixed tests

---
 ...replicated_merge_tree_invalid_metadata_version.reference | 5 +++++
 ...02989_replicated_merge_tree_invalid_metadata_version.sql | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
index 15b2568912c..128e3adcc0a 100644
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
@@ -1,5 +1,10 @@
 Row 1:
 ──────
+name:    metadata
+version: 1
+--
+Row 1:
+──────
 name:  metadata_version
 value: 1
 --
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
index 93ebe65bcde..f63971f456c 100644
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
@@ -9,6 +9,10 @@ CREATE TABLE test_table_replicated
 
 ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
 
+SELECT name, version FROM system.zookeeper
+WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/'
+AND name = 'metadata' FORMAT Vertical;
+
 DROP TABLE test_table_replicated;
 
 CREATE TABLE test_table_replicated
@@ -18,6 +22,8 @@ CREATE TABLE test_table_replicated
     insert_time DateTime
 ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id;
 
+SELECT '--';
+
 SELECT name, value FROM system.zookeeper
 WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
 AND name = 'metadata_version' FORMAT Vertical;

From f6d211b44da03ce02cc2b6aa55a5a84f899e7613 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 17:56:37 +0300
Subject: [PATCH 690/884] StorageSystemTables add metadata_version

---
 src/Storages/System/StorageSystemTables.cpp | 22 +++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index e0d2dd03e78..bed7cc1a4db 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -43,6 +43,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_)
         {"data_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Paths to the table data in the file systems."},
         {"metadata_path", std::make_shared<DataTypeString>(), "Path to the table metadata in the file system."},
         {"metadata_modification_time", std::make_shared<DataTypeDateTime>(), "Time of latest modification of the table metadata."},
+        {"metadata_version", std::make_shared<DataTypeInt32>(), "Metadata version for ReplicatedMergeTree."},
         {"dependencies_database", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Database dependencies."},
         {"dependencies_table", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Table dependencies (materialized views the current table)."},
         {"create_table_query", std::make_shared<DataTypeString>(), "The query that was used to create the table."},
@@ -287,6 +288,11 @@ protected:
                         if (columns_mask[src_index++])
                             res_columns[res_index++]->insertDefault();
 
+                        // metadata_version
+                        // Temporary tables does not support replication
+                        if (columns_mask[src_index++])
+                            res_columns[res_index++]->insert(-1);
+
                         // dependencies_database
                         if (columns_mask[src_index++])
                             res_columns[res_index++]->insertDefault();
@@ -418,6 +424,18 @@ protected:
                 if (columns_mask[src_index++])
                     res_columns[res_index++]->insert(static_cast<UInt64>(database->getObjectMetadataModificationTime(table_name)));
 
+                StorageMetadataPtr metadata_snapshot;
+                if (table)
+                    metadata_snapshot = table->getInMemoryMetadataPtr();
+
+                if (columns_mask[src_index++])
+                {
+                    if (metadata_snapshot && table->supportsReplication())
+                        res_columns[res_index++]->insert(metadata_snapshot->metadata_version);
+                    else
+                        res_columns[res_index++]->insert(-1);
+                }
+
                 {
                     Array views_table_name_array;
                     Array views_database_name_array;
@@ -482,10 +500,6 @@ protected:
                 else
                     src_index += 3;
 
-                StorageMetadataPtr metadata_snapshot;
-                if (table)
-                    metadata_snapshot = table->getInMemoryMetadataPtr();
-
                 ASTPtr expression_ptr;
                 if (columns_mask[src_index++])
                 {

From 0cce3862070ecdc9c20adc3bdf9c15fb8c2c00c9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 13 Feb 2024 18:01:24 +0300
Subject: [PATCH 691/884] Added documentation

---
 docs/en/operations/system-tables/tables.md  | 2 ++
 src/Storages/System/StorageSystemTables.cpp | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md
index 8049ab091c0..fbfefe7c7da 100644
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@@ -27,6 +27,8 @@ Columns:
 
 - `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.
 
+- `metadata_version` ([Int32](../../sql-reference/data-types/int-uint.md)) - Metadata version for ReplicatedMergeTree table, -1 for non replicated table.
+
 - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
 
 - `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table).
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index bed7cc1a4db..ac81ef5d174 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -43,7 +43,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_)
         {"data_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Paths to the table data in the file systems."},
         {"metadata_path", std::make_shared<DataTypeString>(), "Path to the table metadata in the file system."},
         {"metadata_modification_time", std::make_shared<DataTypeDateTime>(), "Time of latest modification of the table metadata."},
-        {"metadata_version", std::make_shared<DataTypeInt32>(), "Metadata version for ReplicatedMergeTree."},
+        {"metadata_version", std::make_shared<DataTypeInt32>(), "Metadata version for ReplicatedMergeTree table."},
         {"dependencies_database", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Database dependencies."},
         {"dependencies_table", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Table dependencies (materialized views the current table)."},
         {"create_table_query", std::make_shared<DataTypeString>(), "The query that was used to create the table."},

From d4778d858d319615d1f6105c0df23fba960ed382 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 14 Feb 2024 17:47:51 +0300
Subject: [PATCH 692/884] Fixed tests

---
 docs/en/operations/system-tables/tables.md    |  2 +-
 src/Storages/System/StorageSystemTables.cpp   | 10 ++--
 .../System/getQueriedColumnsMaskAndHeader.cpp |  5 +-
 .../02117_show_create_table_system.reference  |  1 +
 .../02117_show_create_table_system.sql        |  2 +-
 ...9_system_tables_metadata_version.reference |  9 ++++
 .../02989_system_tables_metadata_version.sql  | 50 +++++++++++++++++++
 7 files changed, 70 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/02989_system_tables_metadata_version.reference
 create mode 100644 tests/queries/0_stateless/02989_system_tables_metadata_version.sql

diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md
index fbfefe7c7da..2132f69319e 100644
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@@ -27,7 +27,7 @@ Columns:
 
 - `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.
 
-- `metadata_version` ([Int32](../../sql-reference/data-types/int-uint.md)) - Metadata version for ReplicatedMergeTree table, -1 for non replicated table.
+- `metadata_version` ([Int32](../../sql-reference/data-types/int-uint.md)) - Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table.
 
 - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
 
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index ac81ef5d174..47c4a03a595 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -43,7 +43,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_)
         {"data_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Paths to the table data in the file systems."},
         {"metadata_path", std::make_shared<DataTypeString>(), "Path to the table metadata in the file system."},
         {"metadata_modification_time", std::make_shared<DataTypeDateTime>(), "Time of latest modification of the table metadata."},
-        {"metadata_version", std::make_shared<DataTypeInt32>(), "Metadata version for ReplicatedMergeTree table."},
+        {"metadata_version", std::make_shared<DataTypeInt32>(), "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."},
         {"dependencies_database", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Database dependencies."},
         {"dependencies_table", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Table dependencies (materialized views the current table)."},
         {"create_table_query", std::make_shared<DataTypeString>(), "The query that was used to create the table."},
@@ -291,7 +291,7 @@ protected:
                         // metadata_version
                         // Temporary tables does not support replication
                         if (columns_mask[src_index++])
-                            res_columns[res_index++]->insert(-1);
+                            res_columns[res_index++]->insertDefault();
 
                         // dependencies_database
                         if (columns_mask[src_index++])
@@ -317,7 +317,7 @@ protected:
                         while (src_index < columns_mask.size())
                         {
                             // total_rows
-                            if (src_index == 18 && columns_mask[src_index])
+                            if (src_index == 19 && columns_mask[src_index])
                             {
                                 if (auto total_rows = table.second->totalRows(settings))
                                     res_columns[res_index++]->insert(*total_rows);
@@ -325,7 +325,7 @@ protected:
                                     res_columns[res_index++]->insertDefault();
                             }
                             // total_bytes
-                            else if (src_index == 19 && columns_mask[src_index])
+                            else if (src_index == 20 && columns_mask[src_index])
                             {
                                 if (auto total_bytes = table.second->totalBytes(settings))
                                     res_columns[res_index++]->insert(*total_bytes);
@@ -433,7 +433,7 @@ protected:
                     if (metadata_snapshot && table->supportsReplication())
                         res_columns[res_index++]->insert(metadata_snapshot->metadata_version);
                     else
-                        res_columns[res_index++]->insert(-1);
+                        res_columns[res_index++]->insertDefault();
                 }
 
                 {
diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp
index c29ccb590ed..b93fe7b8034 100644
--- a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp
+++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp
@@ -11,10 +11,11 @@ std::pair<std::vector<UInt8>, Block> getQueriedColumnsMaskAndHeader(const Block
     NameSet names_set(column_names.begin(), column_names.end());
     for (size_t i = 0; i < columns_mask.size(); ++i)
     {
-        if (names_set.contains(sample_block.getByPosition(i).name))
+        const auto & column_with_type_and_name = sample_block.getByPosition(i);
+        if (names_set.contains(column_with_type_and_name.name))
         {
             columns_mask[i] = 1;
-            header.insert(sample_block.getByPosition(i));
+            header.insert(column_with_type_and_name);
         }
     }
 
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 1b758f4132b..e60fb844de8 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -1083,6 +1083,7 @@ CREATE TABLE system.tables
     `data_paths` Array(String),
     `metadata_path` String,
     `metadata_modification_time` DateTime,
+    `metadata_version` Int32,
     `dependencies_database` Array(String),
     `dependencies_table` Array(String),
     `create_table_query` String,
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.sql b/tests/queries/0_stateless/02117_show_create_table_system.sql
index 32465abbed7..438f26dcca7 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.sql
+++ b/tests/queries/0_stateless/02117_show_create_table_system.sql
@@ -1,6 +1,6 @@
 /* we will `use system` to bypass style check,
 because `show create table` statement
-cannot fit the requirement in check-sytle, which is as
+cannot fit the requirement in check-style, which is as
 
 "# Queries to:
 tables_with_database_column=(
diff --git a/tests/queries/0_stateless/02989_system_tables_metadata_version.reference b/tests/queries/0_stateless/02989_system_tables_metadata_version.reference
new file mode 100644
index 00000000000..73f6a1ad346
--- /dev/null
+++ b/tests/queries/0_stateless/02989_system_tables_metadata_version.reference
@@ -0,0 +1,9 @@
+test_temporary_table_02989	0
+--
+test_table	0
+--
+test_table_replicated	0
+--
+test_table_replicated	1
+--
+test_table_replicated	2
diff --git a/tests/queries/0_stateless/02989_system_tables_metadata_version.sql b/tests/queries/0_stateless/02989_system_tables_metadata_version.sql
new file mode 100644
index 00000000000..9534b1f2e82
--- /dev/null
+++ b/tests/queries/0_stateless/02989_system_tables_metadata_version.sql
@@ -0,0 +1,50 @@
+-- Tags: zookeeper, no-parallel
+
+DROP TABLE IF EXISTS test_temporary_table_02989;
+CREATE TEMPORARY TABLE test_temporary_table_02989
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree ORDER BY id;
+
+SELECT name, metadata_version FROM system.tables WHERE name = 'test_temporary_table_02989' AND is_temporary;
+
+DROP TABLE test_temporary_table_02989;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree ORDER BY id;
+
+SELECT '--';
+
+SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table';
+
+DROP TABLE test_table;
+
+DROP TABLE IF EXISTS test_table_replicated;
+CREATE TABLE test_table_replicated
+(
+    id UInt64,
+    value String
+) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id;
+
+SELECT '--';
+
+SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated';
+
+ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
+
+SELECT '--';
+
+SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated';
+
+ALTER TABLE test_table_replicated ADD COLUMN insert_time_updated DateTime;
+
+SELECT '--';
+
+SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated';
+
+DROP TABLE test_table_replicated;

From 5f87956fc8004bb077bc8161892120b6bca5bac3 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 14 Feb 2024 16:18:10 +0100
Subject: [PATCH 693/884] impl

---
 src/Storages/StorageDistributed.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 5fb404da1cf..6cd9ce1aa8f 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -976,8 +976,10 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistribu
         new_query->select = select_with_union_query;
     }
 
-    const Cluster::AddressesWithFailover & src_addresses = src_distributed.getCluster()->getShardsAddresses();
-    const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses();
+    const auto src_cluster = src_distributed.getCluster();
+    const auto dst_cluster = getCluster();
+    const Cluster::AddressesWithFailover & src_addresses = src_cluster->getShardsAddresses();
+    const Cluster::AddressesWithFailover & dst_addresses = dst_cluster->getShardsAddresses();
     /// Compare addresses instead of cluster name, to handle remote()/cluster().
     /// (since for remote()/cluster() the getClusterName() is empty string)
     if (src_addresses != dst_addresses)
@@ -1006,8 +1008,7 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistribu
         new_query->table_function.reset();
     }
 
-    const auto & cluster = getCluster();
-    const auto & shards_info = cluster->getShardsInfo();
+    const auto & shards_info = dst_cluster->getShardsInfo();
 
     String new_query_str;
     {
@@ -1138,7 +1139,8 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteFromClusterStor
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
 
     /// Here we take addresses from destination cluster and assume source table exists on these nodes
-    for (const auto & replicas : getCluster()->getShardsInfo())
+    const auto cluster = getCluster();
+    for (const auto & replicas : cluster->getShardsInfo())
     {
         /// Skip unavailable hosts if necessary
         auto try_results = replicas.pool->getMany(timeouts, current_settings, PoolMode::GET_MANY, /*async_callback*/ {}, /*skip_unavailable_endpoints*/ true);

From 17c9e2df6b4254c3810e1e5aacdee116374f7ab4 Mon Sep 17 00:00:00 2001
From: jktng <149093363+jktng@users.noreply.github.com>
Date: Wed, 14 Feb 2024 11:39:22 -0500
Subject: [PATCH 694/884] Update entrypoint.sh

---
 docker/server/entrypoint.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 69c0fdda351..79e809ea7f1 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -130,7 +130,7 @@ if [[ -n "${CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS}" || -z "${DATABASE_ALREADY_EXI
   RUN_INITDB_SCRIPTS='true'
 fi
 
-if [ -z "${RUN_INITDB_SCRIPTS}" ]; then
+if [ -n "${RUN_INITDB_SCRIPTS}" ]; then
     if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
         # port is needed to check if clickhouse-server is ready for connections
         HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"

From 078ec9a97fd4f1a9651d45194f3e96c4fa6694a8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 14 Feb 2024 17:43:49 +0100
Subject: [PATCH 695/884] Update notEquals.cpp

---
 src/Functions/notEquals.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/notEquals.cpp b/src/Functions/notEquals.cpp
index 38d0735898c..3a63db46711 100644
--- a/src/Functions/notEquals.cpp
+++ b/src/Functions/notEquals.cpp
@@ -2,8 +2,6 @@
 #include <Functions/FunctionsComparison.h>
 #include <Functions/FunctionsLogical.h>
 
-
-
 namespace DB
 {
 

From 70d456dc2837a0f1665031ee986d098630e9c8b9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 14 Feb 2024 17:44:06 +0100
Subject: [PATCH 696/884] Update greater.cpp

---
 src/Functions/greater.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/greater.cpp b/src/Functions/greater.cpp
index 60fbd18a4e7..2b87b376ce0 100644
--- a/src/Functions/greater.cpp
+++ b/src/Functions/greater.cpp
@@ -2,8 +2,6 @@
 #include <Functions/FunctionsComparison.h>
 #include <Functions/FunctionsLogical.h>
 
-
-
 namespace DB
 {
 

From 08df19157bb16c6c7fcab7dfed6e842022d0313c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 14 Feb 2024 17:22:21 +0000
Subject: [PATCH 697/884] Update version_date.tsv and changelogs after
 v24.1.5.6-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.1.5.6-stable.md  | 17 +++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.1.5.6-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index ab1bc58da1b..d39ca312454 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.4.20"
+ARG VERSION="24.1.5.6"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 39187781a86..2d07937ad79 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.1.4.20"
+ARG VERSION="24.1.5.6"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 67ea2656310..d4775b17319 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.1.4.20"
+ARG VERSION="24.1.5.6"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v24.1.5.6-stable.md b/docs/changelogs/v24.1.5.6-stable.md
new file mode 100644
index 00000000000..ce46c51e2f4
--- /dev/null
+++ b/docs/changelogs/v24.1.5.6-stable.md
@@ -0,0 +1,17 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.5.6-stable (7f67181ff31) FIXME as compared to v24.1.4.20-stable (f59d842b3fa)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 312a33ac2d6..23fc0032056 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v24.1.5.6-stable	2024-02-14
 v24.1.4.20-stable	2024-02-14
 v24.1.3.31-stable	2024-02-09
 v24.1.2.5-stable	2024-02-02

From 54b43ee3a22ccfed8b8e8cc650efad7098b7d876 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 14 Feb 2024 18:33:55 +0100
Subject: [PATCH 698/884] Do not dump to the file, there are no pr_info locally

---
 tests/ci/mark_release_ready.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py
index 011b3f28843..31415fef9c0 100755
--- a/tests/ci/mark_release_ready.py
+++ b/tests/ci/mark_release_ready.py
@@ -56,7 +56,6 @@ def main():
         description,
         RELEASE_READY_STATUS,
         pr_info,
-        dump_to_file=True,
     )
 
 
From a5e3b7982ac310591c815b37541926ce26bdb133 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 14 Feb 2024 18:38:00 +0100
Subject: [PATCH 699/884] Use the least awful and ugly way to check if all
 needs succeeded

---
 .github/workflows/master.yml           | 1 +
 .github/workflows/release_branches.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 209995bfbdd..0e83a777641 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -327,6 +327,7 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
+    if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 6eb787e6453..c076c2209ec 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -228,6 +228,7 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
+    if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64

From 9a62d7e5bb2eaf6fe899cb8a1d9a004921cdecd2 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 14 Feb 2024 20:39:25 +0300
Subject: [PATCH 700/884] Fixed tests

---
 ...icated_merge_tree_invalid_metadata_version.sql | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
index f63971f456c..3e37f368fd8 100644
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
@@ -13,27 +13,28 @@ SELECT name, version FROM system.zookeeper
 WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/'
 AND name = 'metadata' FORMAT Vertical;
 
-DROP TABLE test_table_replicated;
-
-CREATE TABLE test_table_replicated
+DROP TABLE IF EXISTS test_table_replicated_second;
+CREATE TABLE test_table_replicated_second
 (
     id UInt64,
     value String,
     insert_time DateTime
 ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id;
 
+DROP TABLE test_table_replicated;
+
 SELECT '--';
 
 SELECT name, value FROM system.zookeeper
 WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
 AND name = 'metadata_version' FORMAT Vertical;
 
-SYSTEM RESTART REPLICA test_table_replicated;
+SYSTEM RESTART REPLICA test_table_replicated_second;
 
-ALTER TABLE test_table_replicated ADD COLUMN insert_time_updated DateTime;
+ALTER TABLE test_table_replicated_second ADD COLUMN insert_time_updated DateTime;
 
 SELECT '--';
 
-DESCRIBE test_table_replicated;
+DESCRIBE test_table_replicated_second;
 
-DROP TABLE test_table_replicated;
+DROP TABLE test_table_replicated_second;

From d842c497e6f0de7bf5544367faae9f38d260b647 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 14 Feb 2024 18:41:25 +0100
Subject: [PATCH 701/884] Change code to pass test
 02963_remote_read_small_buffer_size_bug.

---
 .../IO/AsynchronousBoundedReadBuffer.cpp      | 50 ++++++++-----------
 src/Disks/IO/AsynchronousBoundedReadBuffer.h  |  4 --
 ...gtest_asynchronous_bounded_read_buffer.cpp |  2 +-
 3 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
index 236ea486d36..2373640704b 100644
--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
@@ -69,7 +69,10 @@ bool AsynchronousBoundedReadBuffer::hasPendingDataToRead()
             return false;
 
         if (file_offset_of_buffer_end > *read_until_position)
-            throwReadBeyondLastOffset();
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Read beyond last offset ({} > {}): file size = {}, info: {}",
+                file_offset_of_buffer_end, *read_until_position, impl->getFileSize(), impl->getInfoForLog());
     }
 
     return true;
@@ -98,18 +101,6 @@ IAsynchronousReader::Result AsynchronousBoundedReadBuffer::readSync(char * data,
     return reader.execute(request);
 }
 
-size_t AsynchronousBoundedReadBuffer::getBufferSizeForReading() const
-{
-    size_t buffer_size = chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize());
-    if (read_until_position)
-    {
-        if (file_offset_of_buffer_end > *read_until_position)
-            throwReadBeyondLastOffset();
-        buffer_size = std::min(buffer_size, *read_until_position - file_offset_of_buffer_end);
-    }
-    return buffer_size;
-}
-
 void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
 {
     if (prefetch_future.valid())
@@ -121,7 +112,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
     last_prefetch_info.submit_time = std::chrono::system_clock::now();
     last_prefetch_info.priority = priority;
 
-    prefetch_buffer.resize(getBufferSizeForReading());
+    prefetch_buffer.resize(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize()));
     prefetch_future = readAsync(prefetch_buffer.data(), prefetch_buffer.size(), priority);
     ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches);
 }
@@ -163,16 +154,6 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position)
     }
 }
 
-void AsynchronousBoundedReadBuffer::throwReadBeyondLastOffset() const
-{
-    size_t file_size = impl->getFileSize();
-    size_t read_end_position = read_until_position ? *read_until_position : file_size;
-    throw Exception(
-        ErrorCodes::LOGICAL_ERROR,
-        "Read beyond last offset ({} > {}): file size = {}, info: {}",
-        file_offset_of_buffer_end, read_end_position, file_size, impl->getInfoForLog());
-}
-
 void AsynchronousBoundedReadBuffer::appendToPrefetchLog(
     FilesystemPrefetchState state,
     int64_t size,
@@ -204,6 +185,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
         return false;
 
     chassert(file_offset_of_buffer_end <= impl->getFileSize());
+    size_t old_file_offset_of_buffer_end = file_offset_of_buffer_end;
 
     IAsynchronousReader::Result result;
     if (prefetch_future.valid())
@@ -228,7 +210,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
     }
     else
     {
-        memory.resize(getBufferSizeForReading());
+        memory.resize(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize()));
 
         {
             ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds);
@@ -239,6 +221,9 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
         ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedBytes, result.size);
     }
 
+    bytes_to_ignore = 0;
+    resetWorkingBuffer();
+
     size_t bytes_read = result.size - result.offset;
     if (bytes_read)
     {
@@ -249,7 +234,6 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
     }
 
     file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd();
-    bytes_to_ignore = 0;
 
     /// In case of multiple files for the same file in clickhouse (i.e. log family)
     /// file_offset_of_buffer_end will not match getImplementationBufferOffset()
@@ -257,9 +241,19 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
     chassert(file_offset_of_buffer_end <= impl->getFileSize());
 
     if (read_until_position && (file_offset_of_buffer_end > *read_until_position))
-        throwReadBeyondLastOffset();
+    {
+        size_t excessive_bytes_read = file_offset_of_buffer_end - *read_until_position;
 
-    return bytes_read;
+        if (excessive_bytes_read > working_buffer.size())
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "File offset moved too far: old_file_offset = {}, new_file_offset = {}, read_until_position = {}, bytes_read = {}",
+                            old_file_offset_of_buffer_end, file_offset_of_buffer_end, *read_until_position, bytes_read);
+
+        working_buffer.resize(working_buffer.size() - excessive_bytes_read);
+        file_offset_of_buffer_end = *read_until_position;
+    }
+
+    return !working_buffer.empty();
 }
 
 
diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h
index b945aed28f0..6dc76352aca 100644
--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h
@@ -94,11 +94,7 @@ private:
 
     IAsynchronousReader::Result readSync(char * data, size_t size);
 
-    size_t getBufferSizeForReading() const;
-
     void resetPrefetch(FilesystemPrefetchState state);
-
-    [[noreturn]] void throwReadBeyondLastOffset() const;
 };
 
 }
diff --git a/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp b/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp
index 5fee295f53a..63a39fe39c7 100644
--- a/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp
+++ b/src/Disks/tests/gtest_asynchronous_bounded_read_buffer.cpp
@@ -35,7 +35,7 @@ private:
 
 String getAlphabetWithDigits()
 {
-    String contents = "";
+    String contents;
     for (char c = 'a'; c <= 'z'; ++c)
         contents += c;
     for (char c = '0'; c <= '9'; ++c)

From b1b58ef9a334ec27107a6919f08c4f109ccbdf8f Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Wed, 14 Feb 2024 12:54:42 -0800
Subject: [PATCH 702/884] Revert "Insert synchronously if dependent MV
 deduplication is enabled"

---
 src/Interpreters/executeQuery.cpp             |  2 -
 src/Server/TCPHandler.cpp                     |  2 +-
 ...c_inserts_for_dependent_mv_dedup.reference |  1 -
 ...e_async_inserts_for_dependent_mv_dedup.sql | 46 -------------------
 4 files changed, 1 insertion(+), 50 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
 delete mode 100644 tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 8b36790a269..dfa9cc056ee 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -927,8 +927,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                 reason = "asynchronous insert queue is not configured";
             else if (insert_query->select)
                 reason = "insert query has select";
-            else if (settings.deduplicate_blocks_in_dependent_materialized_views)
-                reason = "dependent materialized views block deduplication is enabled";
             else if (insert_query->hasInlinedData())
                 async_insert = true;
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index e1086ac5833..2c4e9c1e3b2 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -933,7 +933,7 @@ void TCPHandler::processInsertQuery()
         if (auto table = DatabaseCatalog::instance().tryGetTable(insert_query.table_id, query_context))
             async_insert_enabled |= table->areAsynchronousInsertsEnabled();
 
-    if (insert_queue && async_insert_enabled && !insert_query.select && !settings.deduplicate_blocks_in_dependent_materialized_views)
+    if (insert_queue && async_insert_enabled && !insert_query.select)
     {
         auto result = processAsyncInsertQuery(*insert_queue);
         if (result.status == AsynchronousInsertQueue::PushResult::OK)
diff --git a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
deleted file mode 100644
index 4ff73b99975..00000000000
--- a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.reference
+++ /dev/null
@@ -1 +0,0 @@
-Values	Ok	4	Parsed
diff --git a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql b/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql
deleted file mode 100644
index 41b23374bfc..00000000000
--- a/tests/queries/0_stateless/02985_disable_async_inserts_for_dependent_mv_dedup.sql
+++ /dev/null
@@ -1,46 +0,0 @@
--- Tags: no-parallel
-
-SET async_insert = 1;
-SET insert_deduplicate = 1;
-SET deduplicate_blocks_in_dependent_materialized_views = 1;
-
-DROP TABLE IF EXISTS 02985_test;
-CREATE TABLE 02985_test
-(
-    d Date,
-    value UInt64
-) ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000;
-
-DROP VIEW IF EXISTS 02985_mv;
-CREATE MATERIALIZED VIEW 02985_mv
-ENGINE = SummingMergeTree ORDER BY d AS
-SELECT
-    d, sum(value) s
-FROM 02985_test GROUP BY d;
-
--- Inserts are synchronous.
-INSERT INTO 02985_test (*)
-VALUES ('2024-01-01', 1), ('2024-01-01', 2), ('2024-01-02', 1);
-
-SYSTEM FLUSH LOGS;
-
-SELECT format, status, rows, data_kind  FROM system.asynchronous_insert_log
-WHERE database = currentDatabase() AND table = '02985_test';
-
-SET deduplicate_blocks_in_dependent_materialized_views = 0;
-
--- Set a large value for async_insert_busy_timeout_max_ms to avoid flushing the entry synchronously.
-INSERT INTO 02985_test (*)
-SETTINGS
-    async_insert_busy_timeout_min_ms=200,
-    async_insert_busy_timeout_max_ms=100000
-VALUES ('2024-01-01', 1), ('2024-01-01', 2), ('2024-01-02', 1), ('2024-01-02', 4);
-
-SYSTEM FLUSH LOGS;
-
-SELECT format, status, rows, data_kind
-FROM system.asynchronous_insert_log
-WHERE database = currentDatabase() AND table = '02985_test';
-
-DROP VIEW IF EXISTS 02985_mv;
-DROP TABLE IF EXISTS 02985_test;

From 4f305cca41295032308c861e6ff934a2cebd7960 Mon Sep 17 00:00:00 2001
From: Jake Bamrah <login@bamrah.sh>
Date: Wed, 14 Feb 2024 22:55:54 +0000
Subject: [PATCH 703/884] Update min. clang version in Getting Started docs

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 31346c77949..303e0e315d5 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -91,7 +91,7 @@ If you use Arch or Gentoo, you probably know it yourself how to install CMake.
 
 ## C++ Compiler {#c-compiler}
 
-Compilers Clang starting from version 15 is supported for building ClickHouse.
+Compilers Clang starting from version 16 is supported for building ClickHouse.
 
 Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
 

From 2688a6268caef7b7430a6a7ffc6aa057c1b4360a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 01:42:14 +0100
Subject: [PATCH 704/884] Fix obviously wrong (but non significant) error in
 dictionaries

---
 src/Dictionaries/CacheDictionary.h                            | 2 +-
 src/Dictionaries/DirectDictionary.h                           | 2 +-
 src/Dictionaries/FlatDictionary.h                             | 2 +-
 src/Dictionaries/HashedArrayDictionary.h                      | 2 +-
 src/Dictionaries/HashedDictionary.h                           | 2 +-
 src/Dictionaries/IDictionary.h                                | 3 +++
 src/Dictionaries/IPAddressDictionary.h                        | 2 +-
 src/Dictionaries/PolygonDictionary.h                          | 2 +-
 src/Dictionaries/RangeHashedDictionary.h                      | 2 +-
 src/Dictionaries/RegExpTreeDictionary.h                       | 2 +-
 .../queries/0_stateless/01852_dictionary_found_rate_long.sql  | 3 +++
 .../queries/0_stateless/02832_alter_max_sessions_for_user.sh  | 4 ++--
 12 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index aae86a83f12..041d763dc83 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -85,7 +85,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 214c8ef8a13..ea0baf929f1 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -41,7 +41,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index a54916c5cd1..df48e4f48b0 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -48,7 +48,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h
index 86b21443e18..de17c98e7ac 100644
--- a/src/Dictionaries/HashedArrayDictionary.h
+++ b/src/Dictionaries/HashedArrayDictionary.h
@@ -64,7 +64,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 0b8419dd242..17d3ef51cad 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -106,7 +106,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index f1834b4b129..d3e28682f35 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -109,6 +109,9 @@ public:
 
     virtual size_t getQueryCount() const = 0;
 
+    /// The percentage of time a lookup successfully found an entry.
+    /// When there were no lookups, it returns zero (instead of NaN).
+    /// The value is calculated non atomically and can be slightly off in the presence of concurrent lookups.
     virtual double getFoundRate() const = 0;
 
     virtual double getHitRate() const = 0;
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index d758e23043d..0fb60844867 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -48,7 +48,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index a856d12b66c..4afe8664102 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -78,7 +78,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index c44bffe42e1..0b484032577 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -92,7 +92,7 @@ public:
         size_t queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h
index 78b7f441d34..bf76820ba1e 100644
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@@ -65,7 +65,7 @@ public:
         const auto queries = query_count.load(std::memory_order_relaxed);
         if (!queries)
             return 0;
-        return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
+        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
index 09ca0e2063d..c9ce28e2e99 100644
--- a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
+++ b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
@@ -263,9 +263,12 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'ip_trie_source_table_01
 LAYOUT(IP_TRIE())
 LIFETIME(MIN 0 MAX 1000);
 
+-- found_rate = 0, because we didn't make any searches.
 SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'ip_trie_dictionary_01862';
+-- found_rate = 1, because the dictionary covers the 127.0.0.1 address.
 SELECT dictGet('ip_trie_dictionary_01862', 'value', tuple(toIPv4('127.0.0.1'))) FORMAT Null;
 SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'ip_trie_dictionary_01862';
+-- found_rate = 0.5, because the dictionary does not cover 1.1.1.1 and we have two lookups in total as of now.
 SELECT dictGet('ip_trie_dictionary_01862', 'value', tuple(toIPv4('1.1.1.1'))) FORMAT Null;
 SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'ip_trie_dictionary_01862';
 
diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh
index 546c54a4de9..a3b0d17f1be 100755
--- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh
+++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh
@@ -23,14 +23,14 @@ function test_alter_profile()
 
     ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}"
 
-    # Create sesssions with $max_session_count resriction
+    # Create sessions with $max_session_count restriction
     for ((i = 1 ; i <= ${max_session_count} ; i++)); do
         local session_id="${SESSION_ID_PREFIX}_${i}"
          # Skip output from this query 
          ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null
     done
 
-    # Update resriction to $alter_sessions_count
+    # Update restriction to $alter_sessions_count
     ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}"
 
     # Simultaneous sessions should use max settings from profile ($alter_sessions_count)

From 8e0504038d90e6c6594b745022bf8bf43ac836f8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 02:05:44 +0100
Subject: [PATCH 705/884] Maybe better

---
 .../01852_dictionary_found_rate_long.sql      | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
index c9ce28e2e99..d5108e98510 100644
--- a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
+++ b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql
@@ -22,7 +22,7 @@ CREATE DICTIONARY simple_key_flat_dictionary_01862
     value String
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'simple_key_source_table_01862'))
 LAYOUT(FLAT())
 LIFETIME(MIN 0 MAX 1000);
 
@@ -43,7 +43,7 @@ CREATE DICTIONARY simple_key_direct_dictionary_01862
     value String
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'simple_key_source_table_01862'))
 LAYOUT(DIRECT());
 
 -- check that found_rate is 0, not nan
@@ -65,7 +65,7 @@ CREATE DICTIONARY simple_key_hashed_dictionary_01862
     value String
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'simple_key_source_table_01862'))
 LAYOUT(HASHED())
 LIFETIME(MIN 0 MAX 1000);
 
@@ -85,7 +85,7 @@ CREATE DICTIONARY simple_key_sparse_hashed_dictionary_01862
     value String
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'simple_key_source_table_01862'))
 LAYOUT(SPARSE_HASHED())
 LIFETIME(MIN 0 MAX 1000);
 
@@ -105,7 +105,7 @@ CREATE DICTIONARY simple_key_cache_dictionary_01862
     value String
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'simple_key_source_table_01862'))
 LAYOUT(CACHE(SIZE_IN_CELLS 100000))
 LIFETIME(MIN 0 MAX 1000);
 
@@ -143,7 +143,7 @@ CREATE DICTIONARY complex_key_hashed_dictionary_01862
     value String
 )
 PRIMARY KEY id, id_key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'complex_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'complex_key_source_table_01862'))
 LAYOUT(COMPLEX_KEY_HASHED())
 LIFETIME(MIN 0 MAX 1000);
 
@@ -164,7 +164,7 @@ CREATE DICTIONARY complex_key_direct_dictionary_01862
     value String
 )
 PRIMARY KEY id, id_key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'complex_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'complex_key_source_table_01862'))
 LAYOUT(COMPLEX_KEY_DIRECT());
 
 SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'complex_key_direct_dictionary_01862';
@@ -184,7 +184,7 @@ CREATE DICTIONARY complex_key_cache_dictionary_01862
     value String
 )
 PRIMARY KEY id, id_key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'complex_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'complex_key_source_table_01862'))
 LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 100000))
 LIFETIME(MIN 0 MAX 1000);
 
@@ -223,7 +223,7 @@ CREATE DICTIONARY simple_key_range_hashed_dictionary_01862
     last Date
 )
 PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'range_key_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'range_key_source_table_01862'))
 LAYOUT(RANGE_HASHED())
 RANGE(MIN first MAX last)
 LIFETIME(MIN 0 MAX 1000);
@@ -259,7 +259,7 @@ CREATE DICTIONARY ip_trie_dictionary_01862
     value String
 )
 PRIMARY KEY prefix
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'ip_trie_source_table_01862'))
+SOURCE(CLICKHOUSE(TABLE 'ip_trie_source_table_01862'))
 LAYOUT(IP_TRIE())
 LIFETIME(MIN 0 MAX 1000);
 
@@ -302,7 +302,7 @@ CREATE DICTIONARY polygon_dictionary_01862
     name String
 )
 PRIMARY KEY key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_01862'))
+SOURCE(CLICKHOUSE(USER 'default' TABLE 'polygons_01862'))
 LIFETIME(0)
 LAYOUT(POLYGON());
 

From 46fb01c4f69668d0a48bfa0d7b19a8b3abdd4230 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 02:31:46 +0100
Subject: [PATCH 706/884] Fix trash

---
 src/Dictionaries/CacheDictionary.h       | 10 +++++-----
 src/Dictionaries/DirectDictionary.h      |  6 +++---
 src/Dictionaries/FlatDictionary.h        |  6 +++---
 src/Dictionaries/HashedArrayDictionary.h |  6 +++---
 src/Dictionaries/HashedDictionary.h      |  6 +++---
 src/Dictionaries/IPAddressDictionary.h   |  6 +++---
 src/Dictionaries/PolygonDictionary.h     |  6 +++---
 src/Dictionaries/RangeHashedDictionary.h |  6 +++---
 src/Dictionaries/RegExpTreeDictionary.h  |  6 +++---
 src/Disks/VolumeJBOD.cpp                 |  2 +-
 src/Interpreters/Context.cpp             |  8 ++++----
 11 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index 041d763dc83..a52bcbc4ae4 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -78,22 +78,22 @@ public:
 
     double getLoadFactor() const override;
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return static_cast<double>(hit_count.load(std::memory_order_acquire)) / queries;
+        return static_cast<double>(hit_count.load()) / queries;
     }
 
     bool supportUpdates() const override { return false; }
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index ea0baf929f1..73340904684 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -34,14 +34,14 @@ public:
 
     size_t getBytesAllocated() const override { return 0; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index df48e4f48b0..aac55610351 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -41,14 +41,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h
index de17c98e7ac..f18a8f4a474 100644
--- a/src/Dictionaries/HashedArrayDictionary.h
+++ b/src/Dictionaries/HashedArrayDictionary.h
@@ -57,14 +57,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 17d3ef51cad..ed80973fcf3 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -99,14 +99,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 0fb60844867..105bf7e340a 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -41,14 +41,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index 4afe8664102..48a1f0e56da 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -71,14 +71,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 0b484032577..28db67038ca 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -85,14 +85,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        size_t queries = query_count.load(std::memory_order_relaxed);
+        size_t queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h
index bf76820ba1e..68b6b603692 100644
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@@ -58,14 +58,14 @@ public:
 
     size_t getBytesAllocated() const override { return bytes_allocated; }
 
-    size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
+    size_t getQueryCount() const override { return query_count.load(); }
 
     double getFoundRate() const override
     {
-        const auto queries = query_count.load(std::memory_order_relaxed);
+        const auto queries = query_count.load();
         if (!queries)
             return 0;
-        return std::min(1.0, static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries);
+        return std::min(1.0, static_cast<double>(found_count.load()) / queries);
     }
 
     double getHitRate() const override { return 1.0; }
diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp
index e437684b802..ec9e5ea0d39 100644
--- a/src/Disks/VolumeJBOD.cpp
+++ b/src/Disks/VolumeJBOD.cpp
@@ -85,7 +85,7 @@ VolumeJBOD::VolumeJBOD(const VolumeJBOD & volume_jbod,
         DiskSelectorPtr disk_selector)
     : VolumeJBOD(volume_jbod.name, config, config_prefix, disk_selector)
 {
-    are_merges_avoided_user_override = volume_jbod.are_merges_avoided_user_override.load(std::memory_order_relaxed);
+    are_merges_avoided_user_override = volume_jbod.are_merges_avoided_user_override.load();
     last_used = volume_jbod.last_used.load(std::memory_order_relaxed);
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0e5897e7306..697c488e265 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4154,12 +4154,12 @@ void Context::setMaxTableSizeToDrop(size_t max_size)
 
 size_t Context::getMaxTableSizeToDrop() const
 {
-    return shared->max_table_size_to_drop.load(std::memory_order_relaxed);
+    return shared->max_table_size_to_drop.load();
 }
 
 void Context::checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const
 {
-    size_t max_table_size_to_drop = shared->max_table_size_to_drop.load(std::memory_order_relaxed);
+    size_t max_table_size_to_drop = shared->max_table_size_to_drop.load();
 
     checkCanBeDropped(database, table, table_size, max_table_size_to_drop);
 }
@@ -4177,12 +4177,12 @@ void Context::setMaxPartitionSizeToDrop(size_t max_size)
 
 size_t Context::getMaxPartitionSizeToDrop() const
 {
-    return shared->max_partition_size_to_drop.load(std::memory_order_relaxed);
+    return shared->max_partition_size_to_drop.load();
 }
 
 void Context::checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const
 {
-    size_t max_partition_size_to_drop = shared->max_partition_size_to_drop.load(std::memory_order_relaxed);
+    size_t max_partition_size_to_drop = shared->max_partition_size_to_drop.load();
 
     checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop);
 }

From 9219617959745f0572846a473cdbfe83adad55e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 03:01:34 +0100
Subject: [PATCH 707/884] Apply review comments

---
 tests/integration/test_settings_profile/test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 70740104d63..6d7aa3d051f 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -469,8 +469,6 @@ def test_show_profiles():
         "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
         "CREATE SETTINGS PROFILE xyz\n",
         "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
-        "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
-        "CREATE SETTINGS PROFILE xyz\n",
     ]
     assert instance.query("SHOW CREATE PROFILES") in query_possible_response
 

From acba747275062fe8dc7e8d680d7b144a8df3a7cd Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 15 Feb 2024 02:12:44 +0000
Subject: [PATCH 708/884] Automatic style fix

---
 tests/integration/test_settings_profile/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 6d7aa3d051f..4c9fc31f76d 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -468,7 +468,7 @@ def test_show_profiles():
         "CREATE SETTINGS PROFILE default\n"
         "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
         "CREATE SETTINGS PROFILE xyz\n",
-        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
+        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n",
     ]
     assert instance.query("SHOW CREATE PROFILES") in query_possible_response
 

From 1bc3dc632c7ba4bc2ff6a3b23339392530dbfa8e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 04:56:14 +0100
Subject: [PATCH 709/884] Inhibit randomization in some tests

---
 tests/queries/1_stateful/00037_uniq_state_merge1.sql | 1 +
 tests/queries/1_stateful/00038_uniq_state_merge2.sql | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/queries/1_stateful/00037_uniq_state_merge1.sql b/tests/queries/1_stateful/00037_uniq_state_merge1.sql
index c941a14b571..6abaad7297f 100644
--- a/tests/queries/1_stateful/00037_uniq_state_merge1.sql
+++ b/tests/queries/1_stateful/00037_uniq_state_merge1.sql
@@ -1 +1,2 @@
+SET max_bytes_before_external_group_by = '1G';
 SELECT k, any(u) AS u, uniqMerge(us) AS us FROM (SELECT domain(URL) AS k, uniq(UserID) AS u, uniqState(UserID) AS us FROM test.hits GROUP BY k) GROUP BY k ORDER BY u DESC, k ASC LIMIT 100
diff --git a/tests/queries/1_stateful/00038_uniq_state_merge2.sql b/tests/queries/1_stateful/00038_uniq_state_merge2.sql
index 677458daeda..f97395943a1 100644
--- a/tests/queries/1_stateful/00038_uniq_state_merge2.sql
+++ b/tests/queries/1_stateful/00038_uniq_state_merge2.sql
@@ -1 +1,2 @@
+SET max_bytes_before_external_group_by = '1G';
 SELECT topLevelDomain(concat('http://', k)) AS tld, sum(u) AS u, uniqMerge(us) AS us FROM (SELECT domain(URL) AS k, uniq(UserID) AS u, uniqState(UserID) AS us FROM test.hits GROUP BY k) GROUP BY tld ORDER BY u DESC, tld ASC LIMIT 100

From d60ab4146dd167d3e177778e62e3a1cbbca1a78c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 05:02:10 +0100
Subject: [PATCH 710/884] Inhibit randomization in some tests

---
 ...43_variant_type_with_different_local_and_global_order.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh
index d089ed3cb2f..e4c1206263f 100755
--- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh
+++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-debug
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # reset --log_comment
@@ -74,11 +74,11 @@ run 0
 $CH_CLIENT -q "drop table test;"
 
 echo "MergeTree compact"
-$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;"
+$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000, index_granularity = 8192, index_granularity_bytes = '10Mi';"
 run 1
 $CH_CLIENT -q "drop table test;"
 
 echo "MergeTree wide"
-$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity = 8192, index_granularity_bytes = '10Mi';"
 run 1
 $CH_CLIENT -q "drop table test;"

From 2660e69abb102b45e6d321cdd294ddd228beb39f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 05:12:59 +0100
Subject: [PATCH 711/884] Inhibit randomization in some tests

---
 .../0_stateless/02450_kill_distributed_query_deadlock.sh        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
index 03c43843d3a..0cd520d8d5d 100755
--- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
+++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-random-settings, no-debug
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From d3fdd97891eea2877e881a5bb4160c274c97179b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 06:04:45 +0100
Subject: [PATCH 712/884] The code should not be complex

---
 .../Serializations/SerializationTuple.cpp     | 53 +++++++++----------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index 5d8c84b70bf..399ad870d60 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -62,7 +62,7 @@ void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num,
 }
 
 
-template <typename ReturnType = void, typename F>
+template <typename ReturnType, typename F>
 static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
 {
     static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@@ -85,11 +85,7 @@ static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
 
     try
     {
-        if constexpr (throw_exception)
-        {
-            impl();
-        }
-        else if (!impl())
+        if (!impl())
         {
             restore_elements();
             return ReturnType(false);
@@ -125,10 +121,11 @@ static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
 
 void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    addElementSafe(elems.size(), column, [&]
+    addElementSafe<void>(elems.size(), column, [&]
     {
         for (size_t i = 0; i < elems.size(); ++i)
             elems[i]->deserializeBinary(extractElementColumn(column, i), istr, settings);
+        return true;
     });
 }
 
@@ -165,7 +162,7 @@ ReturnType SerializationTuple::deserializeTextImpl(IColumn & column, ReadBuffer
                 if constexpr (throw_exception)
                     assertChar(',', istr);
                 else if (!checkChar(',', istr))
-                    return ReturnType(false);
+                    return false;
 
                 skipWhitespaceIfAny(istr);
             }
@@ -203,16 +200,16 @@ ReturnType SerializationTuple::deserializeTextImpl(IColumn & column, ReadBuffer
         if constexpr (throw_exception)
             assertChar(')', istr);
         else if (!checkChar(')', istr))
-            return ReturnType(false);
+            return false;
 
         if (whole && !istr.eof())
         {
             if constexpr (throw_exception)
                 throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
-            return ReturnType(false);
+            return false;
         }
 
-        return ReturnType(true);
+        return true;
     };
 
     return addElementSafe<ReturnType>(elems.size(), column, impl);
@@ -323,6 +320,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
             else
                 elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
+            return true;
         }
         else
         {
@@ -353,7 +351,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 {
                     if constexpr (throw_exception)
                         throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
-                    return ReturnType(false);
+                    return false;
                 }
 
                 if (processed + skipped > 0)
@@ -361,7 +359,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                     if constexpr (throw_exception)
                         assertChar(',', istr);
                     else if (!checkChar(',', istr))
-                        return ReturnType(false);
+                        return false;
                     skipWhitespaceIfAny(istr);
                 }
 
@@ -369,13 +367,13 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 if constexpr (throw_exception)
                     readDoubleQuotedString(name, istr);
                 else if (!tryReadDoubleQuotedString(name, istr))
-                    return ReturnType(false);
+                    return false;
 
                 skipWhitespaceIfAny(istr);
                 if constexpr (throw_exception)
                     assertChar(':', istr);
                 else if (!checkChar(':', istr))
-                    return ReturnType(false);
+                    return false;
                 skipWhitespaceIfAny(istr);
 
                 const size_t element_pos = getPositionByName(name);
@@ -386,7 +384,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                         if constexpr (throw_exception)
                             skipJSONField(istr, name);
                         else if (!trySkipJSONField(istr, name))
-                            return ReturnType(false);
+                            return false;
 
                         skipWhitespaceIfAny(istr);
                         ++skipped;
@@ -396,7 +394,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                     {
                         if constexpr (throw_exception)
                             throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
-                        return ReturnType(false);
+                        return false;
                     }
                 }
 
@@ -418,7 +416,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 else
                 {
                     if (!deserialize_element(element_column, element_pos))
-                        return ReturnType(false);
+                        return false;
                 }
 
                 skipWhitespaceIfAny(istr);
@@ -428,7 +426,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
             if constexpr (throw_exception)
                 assertChar('}', istr);
             else if (!checkChar('}', istr))
-                return ReturnType(false);
+                return false;
 
             /// Check if we have missing elements.
             if (processed != elems.size())
@@ -446,7 +444,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                                 "JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
                                 "enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
                                 elems[element_pos]->getElementName());
-                        return ReturnType(false);
+                        return false;
                     }
 
                     auto & element_column = extractElementColumn(column, element_pos);
@@ -454,7 +452,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 }
             }
 
-            return ReturnType(true);
+            return true;
         };
 
         return addElementSafe<ReturnType>(elems.size(), column, impl);
@@ -465,7 +463,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
         if constexpr (throw_exception)
             assertChar('[', istr);
         else if (!checkChar('[', istr))
-            return ReturnType(false);
+            return false;
         skipWhitespaceIfAny(istr);
 
         auto impl = [&]()
@@ -478,7 +476,7 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                     if constexpr (throw_exception)
                         assertChar(',', istr);
                     else if (!checkChar(',', istr))
-                        return ReturnType(false);
+                        return false;
                     skipWhitespaceIfAny(istr);
                 }
 
@@ -487,16 +485,16 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
                 if constexpr (throw_exception)
                     deserialize_element(element_column, i);
                 else if (!deserialize_element(element_column, i))
-                    return ReturnType(false);
+                    return false;
             }
 
             skipWhitespaceIfAny(istr);
             if constexpr (throw_exception)
                 assertChar(']', istr);
             else if (!checkChar(']', istr))
-                return ReturnType(false);
+                return false;
 
-            return ReturnType(true);
+            return true;
         };
 
         return addElementSafe<ReturnType>(elems.size(), column, impl);
@@ -538,7 +536,7 @@ void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num
 
 void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    addElementSafe(elems.size(), column, [&]
+    addElementSafe<void>(elems.size(), column, [&]
     {
         const size_t size = elems.size();
         for (size_t i = 0; i < size; ++i)
@@ -556,6 +554,7 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
             else
                 elems[i]->deserializeTextCSV(element_column, istr, settings);
         }
+        return true;
     });
 }
 

From 503bec73ec5337942592063d861bac837a8484ca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 06:11:02 +0100
Subject: [PATCH 713/884] Exclude test run from a slow build

---
 tests/queries/0_stateless/01193_metadata_loading.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh
index c25cdf4e970..69178a93d42 100755
--- a/tests/queries/0_stateless/01193_metadata_loading.sh
+++ b/tests/queries/0_stateless/01193_metadata_loading.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-s3-storage
+# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-s3-storage, no-sanitize-coverage
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -8,16 +8,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # Check that attaching a database with a large number of tables is not too slow.
 # it is the worst way of making performance test, nevertheless it can detect significant slowdown and some other issues, that usually found by stress test
 
-db="test_01193_$RANDOM"
+db="test_01193_$RANDOM_$RANDOM_$RANDOM_$RANDOM"
 tables=1000
 threads=10
 count_multiplier=1
 max_time_ms=1500
 
-debug_or_sanitizer_build=$($CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()")
-
-if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi
-
 create_tables() {
   $CLICKHOUSE_CLIENT -q "WITH
           'CREATE TABLE $db.table_$1_' AS create1,

From accc63500dc87e5a5585a826a872c37d6a099995 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 06:54:18 +0100
Subject: [PATCH 714/884] Fix broken lambdas formatting

---
 src/Parsers/ASTFunction.cpp | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index e7f7b48091a..73625fcfe35 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -1034,7 +1034,15 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
                 }
             }
 
-            if (!written && name == "lambda"sv)
+            const auto & first_argument = arguments->children[0];
+            const ASTIdentifier * first_argument_identifier = first_argument->as<ASTIdentifier>();
+            const ASTFunction * first_argument_function = first_argument->as<ASTFunction>();
+            bool first_argument_is_tuple = first_argument_function && first_argument_function->name == "tuple";
+
+            /// Only these types of arguments are accepted by the parser of the '->' operator.
+            bool acceptable_first_argument_for_lambda_expression = first_argument_identifier || first_argument_is_tuple;
+
+            if (!written && name == "lambda"sv && acceptable_first_argument_for_lambda_expression)
             {
                 /// Special case: zero elements tuple in lhs of lambda is printed as ().
                 /// Special case: one-element tuple in lhs of lambda is printed as its element.
@@ -1042,19 +1050,17 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
                 if (frame.need_parens)
                     settings.ostr << '(';
 
-                const auto * first_arg_func = arguments->children[0]->as<ASTFunction>();
-                if (first_arg_func
-                    && first_arg_func->name == "tuple"
-                    && first_arg_func->arguments
-                    && (first_arg_func->arguments->children.size() == 1 || first_arg_func->arguments->children.empty()))
+                if (first_argument_is_tuple
+                    && first_argument_function->arguments
+                    && (first_argument_function->arguments->children.size() == 1 || first_argument_function->arguments->children.empty()))
                 {
-                    if (first_arg_func->arguments->children.size() == 1)
-                        first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens);
+                    if (first_argument_function->arguments->children.size() == 1)
+                        first_argument_function->arguments->children[0]->formatImpl(settings, state, nested_need_parens);
                     else
                         settings.ostr << "()";
                 }
                 else
-                    arguments->children[0]->formatImpl(settings, state, nested_need_parens);
+                    first_argument->formatImpl(settings, state, nested_need_parens);
 
                 settings.ostr << (settings.hilite ? hilite_operator : "") << " -> " << (settings.hilite ? hilite_none : "");
                 arguments->children[1]->formatImpl(settings, state, nested_need_parens);

From b322be55b26fa594a97db6f92b01784e56ebb470 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 06:54:28 +0100
Subject: [PATCH 715/884] Add a test

---
 .../0_stateless/02990_format_lambdas.reference        | 10 ++++++++++
 tests/queries/0_stateless/02990_format_lambdas.sh     | 11 +++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02990_format_lambdas.reference
 create mode 100755 tests/queries/0_stateless/02990_format_lambdas.sh

diff --git a/tests/queries/0_stateless/02990_format_lambdas.reference b/tests/queries/0_stateless/02990_format_lambdas.reference
new file mode 100644
index 00000000000..f898d6ffa0e
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_lambdas.reference
@@ -0,0 +1,10 @@
+SELECT lambda(1, 1)
+SELECT lambda(1, 1)
+SELECT x -> 1
+SELECT x -> 1
+SELECT (x, y) -> 1
+SELECT (x, y) -> 1
+SELECT lambda(f(1), 1)
+SELECT lambda(f(1), 1)
+SELECT lambda(f(x), 1)
+SELECT lambda(f(x), 1)
diff --git a/tests/queries/0_stateless/02990_format_lambdas.sh b/tests/queries/0_stateless/02990_format_lambdas.sh
new file mode 100755
index 00000000000..9dc5e0f0461
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_lambdas.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+QUERY="SELECT lambda(1, 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3";
+QUERY="SELECT lambda(x, 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3";
+QUERY="SELECT lambda((x, y), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3";
+QUERY="SELECT lambda(f(1), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3";
+QUERY="SELECT lambda(f(x), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3";

From 0258aba5c018446a1a0e4fae871208b637de96b2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 07:49:13 +0100
Subject: [PATCH 716/884] Verify formatting consistency on the server-side

---
 src/Interpreters/executeQuery.cpp | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 8b36790a269..824bb3c255b 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -704,7 +704,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     {
         if (settings.dialect == Dialect::kusto && !internal)
         {
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Kusto dialect is disabled until these two bugs will be fixed: https://github.com/ClickHouse/ClickHouse/issues/59037 and https://github.com/ClickHouse/ClickHouse/issues/59036");
+            ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert);
+
+            /// TODO: parser should fail early when max_query_size limit is reached.
+            ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
         }
         else if (settings.dialect == Dialect::prql && !internal)
         {
@@ -716,6 +719,27 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
             /// TODO: parser should fail early when max_query_size limit is reached.
             ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
+
+#ifndef NDEBUG
+            /// Verify that AST formatting is consistent:
+            /// If you format AST, parse it back, and format it again, you get the same string.
+
+            String formatted1 = ast->formatForErrorMessage();
+
+            ASTPtr ast2 = parseQuery(parser,
+                formatted1.data(),
+                formatted1.data() + formatted1.size(),
+                "", max_query_size, settings.max_parser_depth);
+
+            chassert(ast2);
+
+            String formatted2 = ast2->formatForErrorMessage();
+
+            if (formatted1 != formatted2)
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Inconsistent AST formatting: the query:\n{}\nWas parsed and formatted back as:\n{}",
+                    formatted1, formatted2);
+#endif
         }
 
         const char * query_end = end;

From a037933cc68adf82e628b8e137f07644eb9f5ab2 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 14 Feb 2024 22:57:23 -0800
Subject: [PATCH 717/884] [Docs] Add min version for startsWithUTF8

---
 docs/en/sql-reference/functions/string-functions.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 60cb3ac4ac4..9ae403be524 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -4,6 +4,8 @@ sidebar_position: 170
 sidebar_label: Strings
 ---
 
+import VersionBadge from '@theme/badges/VersionBadge';
+
 # Functions for Working with Strings
 
 Functions for [searching](string-search-functions.md) in strings and for [replacing](string-replace-functions.md) in strings are described separately.
@@ -783,6 +785,8 @@ SELECT startsWith('Spider-Man', 'Spi');
 
 ## startsWithUTF8
 
+<VersionBadge minVersion='23.8' />
+
 Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters.
 
 
From ddc429ccbcd519ffcadf6f50d003fd998b06c2c0 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 15 Feb 2024 10:14:58 +0100
Subject: [PATCH 718/884] Revert incorrect changes

---
 tests/integration/test_settings_profile/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 4c9fc31f76d..70740104d63 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -468,7 +468,9 @@ def test_show_profiles():
         "CREATE SETTINGS PROFILE default\n"
         "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
         "CREATE SETTINGS PROFILE xyz\n",
-        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n",
+        "CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
+        "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
+        "CREATE SETTINGS PROFILE xyz\n",
     ]
     assert instance.query("SHOW CREATE PROFILES") in query_possible_response
 

From 31128ecaa7b3d8e0bd7c359389504356579a664e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 15 Feb 2024 10:29:27 +0000
Subject: [PATCH 719/884] Analyzer: Fix
 test_sql_user_defined_functions_on_cluster

---
 tests/analyzer_integration_broken_tests.txt   |  1 -
 .../test.py                                   | 33 ++++++++-----------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index 18086b6a5c1..2c8ee34fdf4 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -9,4 +9,3 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
 test_select_access_rights/test_main.py::test_alias_columns
 test_settings_profile/test.py::test_show_profiles
-test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
diff --git a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
index c940998ec42..46c166965d7 100644
--- a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
+++ b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
@@ -1,5 +1,5 @@
 import pytest
-from helpers.cluster import ClickHouseCluster
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 
 cluster = ClickHouseCluster(__file__)
 ch1 = cluster.add_instance(
@@ -24,15 +24,15 @@ def started_cluster():
 
 
 def test_sql_user_defined_functions_on_cluster():
-    assert "Unknown function test_function" in ch1.query_and_get_error(
-        "SELECT test_function(1);"
-    )
-    assert "Unknown function test_function" in ch2.query_and_get_error(
-        "SELECT test_function(1);"
-    )
-    assert "Unknown function test_function" in ch3.query_and_get_error(
-        "SELECT test_function(1);"
-    )
+    def check_function_does_not_exist(node : ClickHouseInstance):
+        error_message = node.query_and_get_error(
+            "SELECT test_function(1);"
+        )
+        assert "Unknown function test_function" in error_message or "Function with name 'test_function' does not exists. In scope SELECT test_function(1)" in error_message
+
+    check_function_does_not_exist(ch1)
+    check_function_does_not_exist(ch2)
+    check_function_does_not_exist(ch3)
 
     ch1.query_with_retry(
         "CREATE FUNCTION test_function ON CLUSTER 'cluster' AS x -> x + 1;"
@@ -43,12 +43,7 @@ def test_sql_user_defined_functions_on_cluster():
     assert ch3.query("SELECT test_function(1);") == "2\n"
 
     ch2.query_with_retry("DROP FUNCTION test_function ON CLUSTER 'cluster'")
-    assert "Unknown function test_function" in ch1.query_and_get_error(
-        "SELECT test_function(1);"
-    )
-    assert "Unknown function test_function" in ch2.query_and_get_error(
-        "SELECT test_function(1);"
-    )
-    assert "Unknown function test_function" in ch3.query_and_get_error(
-        "SELECT test_function(1);"
-    )
+
+    check_function_does_not_exist(ch1)
+    check_function_does_not_exist(ch2)
+    check_function_does_not_exist(ch3)

From ad626233a188e8bf0ef29669eada5d7da0eec175 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 15 Feb 2024 10:43:16 +0000
Subject: [PATCH 720/884] Automatic style fix

---
 .../test_sql_user_defined_functions_on_cluster/test.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
index 46c166965d7..0bf03f545be 100644
--- a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
+++ b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py
@@ -24,11 +24,13 @@ def started_cluster():
 
 
 def test_sql_user_defined_functions_on_cluster():
-    def check_function_does_not_exist(node : ClickHouseInstance):
-        error_message = node.query_and_get_error(
-            "SELECT test_function(1);"
+    def check_function_does_not_exist(node: ClickHouseInstance):
+        error_message = node.query_and_get_error("SELECT test_function(1);")
+        assert (
+            "Unknown function test_function" in error_message
+            or "Function with name 'test_function' does not exists. In scope SELECT test_function(1)"
+            in error_message
         )
-        assert "Unknown function test_function" in error_message or "Function with name 'test_function' does not exists. In scope SELECT test_function(1)" in error_message
 
     check_function_does_not_exist(ch1)
     check_function_does_not_exist(ch2)

From 7396fa0b1474cf091dfd608e50cf2631255f312a Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:23:54 +0000
Subject: [PATCH 721/884] add docs spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 51aa8222a89..f97ea747471 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -990,6 +990,7 @@ VIEWs
 Vadim
 Valgrind
 Vectorized
+VersionBadge
 VersionInteger
 VersionedCollapsingMergeTree
 VideoContainer

From 504a2dd9d9dfaf2bad9b46f740364e1885cb3ca2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Feb 2024 11:27:52 +0100
Subject: [PATCH 722/884] Do not rewrite sum() to count() if return value
 differs in analyzer

v2: fix for LowCardinality
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Analyzer/Passes/NormalizeCountVariantsPass.cpp         | 6 ++++++
 .../0_stateless/02991_count_rewrite_analyzer.reference     | 4 ++++
 tests/queries/0_stateless/02991_count_rewrite_analyzer.sql | 7 +++++++
 3 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/02991_count_rewrite_analyzer.reference
 create mode 100644 tests/queries/0_stateless/02991_count_rewrite_analyzer.sql

diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
index ce368a69ba9..0d6f3fc2d87 100644
--- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
+++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
@@ -7,6 +7,7 @@
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/FunctionNode.h>
 #include <Interpreters/Context.h>
+#include <DataTypes/DataTypesNumber.h>
 
 namespace DB
 {
@@ -32,6 +33,11 @@ public:
         if (function_node->getArguments().getNodes().size() != 1)
             return;
 
+        /// forbid the optimization if return value of sum() and count() differs:
+        /// count() returns only UInt64 type, while sum() could return Nullable().
+        if (!function_node->getResultType()->equals(DataTypeUInt64()))
+            return;
+
         auto & first_argument = function_node->getArguments().getNodes()[0];
         auto * first_argument_constant_node = first_argument->as<ConstantNode>();
         if (!first_argument_constant_node)
diff --git a/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference b/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference
new file mode 100644
index 00000000000..ccb266fc2b5
--- /dev/null
+++ b/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference
@@ -0,0 +1,4 @@
+Nullable(UInt64)
+UInt64
+Nullable(UInt64)
+UInt64
diff --git a/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql b/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql
new file mode 100644
index 00000000000..b11aeedd225
--- /dev/null
+++ b/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql
@@ -0,0 +1,7 @@
+-- Regression test for https://github.com/ClickHouse/ClickHouse/issues/59919
+SET allow_experimental_analyzer=1;
+
+SELECT toTypeName(sum(toNullable('a') IN toNullable('a'))) AS x;
+SELECT toTypeName(count(toNullable('a') IN toNullable('a'))) AS x;
+SELECT toTypeName(sum(toFixedString('a', toLowCardinality(toNullable(1))) IN toFixedString('a', 1))) AS x;
+SELECT toTypeName(count(toFixedString('a', toLowCardinality(toNullable(1))) IN toFixedString('a', 1))) AS x;

From bbe38a3fe4717fad4c79b2ee5a8e40323c8e3dcb Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 15 Feb 2024 09:15:51 +0100
Subject: [PATCH 723/884] Add ability to escape quotes in Values format with
 single quote

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/Settings.h                                  |  2 ++
 src/DataTypes/Serializations/SerializationString.cpp |  7 +++++--
 src/Formats/FormatFactory.cpp                        |  1 +
 src/Formats/FormatSettings.h                         |  1 +
 .../0_stateless/02993_values_escape_quote.reference  |  3 +++
 .../0_stateless/02993_values_escape_quote.sql        | 12 ++++++++++++
 6 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02993_values_escape_quote.reference
 create mode 100644 tests/queries/0_stateless/02993_values_escape_quote.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 42dad28aa74..53ba18a3001 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1146,6 +1146,8 @@ class IColumn;
     M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \
     M(Bool, output_format_sql_insert_quote_names, true, "Quote column names with '`' characters", 0) \
     \
+    M(Bool, output_format_values_escape_quote_with_quote, false, "If true escape ' with '', otherwise quoted with \\'", 0) \
+    \
     M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \
     M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
     \
diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index 6bffa0ff72e..fd46206e9ad 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -334,9 +334,12 @@ bool SerializationString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer
     return read<bool>(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); return true; });
 }
 
-void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    writeQuotedString(assert_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
+    if (settings.values.escape_quote_with_quote)
+        writeQuotedStringPostgreSQL(assert_cast<const ColumnString &>(column).getDataAt(row_num).toView(), ostr);
+    else
+        writeQuotedString(assert_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
 }
 
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 8c39b4b71e4..866b6934b70 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -181,6 +181,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
     format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
     format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
+    format_settings.values.escape_quote_with_quote = settings.output_format_values_escape_quote_with_quote;
     format_settings.with_names_use_header = settings.input_format_with_names_use_header;
     format_settings.with_types_use_header = settings.input_format_with_types_use_header;
     format_settings.write_statistics = settings.output_format_write_statistics;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index bdd2dda5287..cda15fd6531 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -361,6 +361,7 @@ struct FormatSettings
         bool deduce_templates_of_expressions = true;
         bool accurate_types_of_literals = true;
         bool allow_data_after_semicolon = false;
+        bool escape_quote_with_quote = false;
     } values;
 
     enum class ORCCompression
diff --git a/tests/queries/0_stateless/02993_values_escape_quote.reference b/tests/queries/0_stateless/02993_values_escape_quote.reference
new file mode 100644
index 00000000000..29d6a133fec
--- /dev/null
+++ b/tests/queries/0_stateless/02993_values_escape_quote.reference
@@ -0,0 +1,3 @@
+('foo')('foo\'bar')('foo\'\'bar')
+output_format_values_escape_quote_with_quote=1
+('foo')('foo''bar')('foo''''bar')
diff --git a/tests/queries/0_stateless/02993_values_escape_quote.sql b/tests/queries/0_stateless/02993_values_escape_quote.sql
new file mode 100644
index 00000000000..e6fc5f1b280
--- /dev/null
+++ b/tests/queries/0_stateless/02993_values_escape_quote.sql
@@ -0,0 +1,12 @@
+select 'foo' format Values;
+select 'foo\'bar' format Values;
+select 'foo\'\'bar' format Values;
+
+select '\noutput_format_values_escape_quote_with_quote=1' format LineAsString;
+set output_format_values_escape_quote_with_quote=1;
+
+select 'foo' format Values;
+select 'foo\'bar' format Values;
+select 'foo\'\'bar' format Values;
+-- fix no newline at end of file
+select '' format LineAsString;

From f10fc95933cb3beaf63329c97c412ec8201f082f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 15 Feb 2024 09:28:24 +0100
Subject: [PATCH 724/884] Fix INSERT into SQLite with single quote

Previously it leads to syntax error, due to incorrect escaping of single
quotes for SQLite, "\'" had been used instead of "''"

So set output_format_values_escape_quote_with_quote=true for SQLite to
fix this.

v2: prepare modified Context for writing on storage creation
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/StorageSQLite.cpp                  | 17 ++++++++++++++++-
 src/Storages/StorageSQLite.h                    |  3 +++
 .../01889_sqlite_read_write.reference           |  2 +-
 .../0_stateless/01889_sqlite_read_write.sh      |  2 +-
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp
index 85c5e16a1bf..30cca409dc8 100644
--- a/src/Storages/StorageSQLite.cpp
+++ b/src/Storages/StorageSQLite.cpp
@@ -19,6 +19,20 @@
 #include <QueryPipeline/Pipe.h>
 #include <Common/filesystemHelpers.h>
 
+namespace
+{
+
+using namespace DB;
+
+ContextPtr makeSQLiteWriteContext(ContextPtr context)
+{
+    auto write_context = Context::createCopy(context);
+    write_context->setSetting("output_format_values_escape_quote_with_quote", Field(true));
+    return write_context;
+}
+
+}
+
 
 namespace DB
 {
@@ -43,6 +57,7 @@ StorageSQLite::StorageSQLite(
     , database_path(database_path_)
     , sqlite_db(sqlite_db_)
     , log(getLogger("StorageSQLite (" + table_id_.table_name + ")"))
+    , write_context(makeSQLiteWriteContext(getContext()))
 {
     StorageInMemoryMetadata storage_metadata;
 
@@ -144,7 +159,7 @@ public:
 
         sqlbuf << ") VALUES ";
 
-        auto writer = FormatFactory::instance().getOutputFormat("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.getContext());
+        auto writer = FormatFactory::instance().getOutputFormat("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.write_context);
         writer->write(block);
 
         sqlbuf << ";";
diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h
index baacdfb4899..ed673123fe0 100644
--- a/src/Storages/StorageSQLite.h
+++ b/src/Storages/StorageSQLite.h
@@ -47,10 +47,13 @@ public:
         const String & table);
 
 private:
+    friend class SQLiteSink; /// for write_context
+
     String remote_table_name;
     String database_path;
     SQLitePtr sqlite_db;
     LoggerPtr log;
+    ContextPtr write_context;
 };
 
 }
diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.reference b/tests/queries/0_stateless/01889_sqlite_read_write.reference
index 9f2b382e41e..e605693d95d 100644
--- a/tests/queries/0_stateless/01889_sqlite_read_write.reference
+++ b/tests/queries/0_stateless/01889_sqlite_read_write.reference
@@ -29,7 +29,7 @@ CREATE TABLE default.sqlite_table3\n(\n    `col1` String,\n    `col2` Int32\n)\n
 not a null	2
 	3
 	4
-line6	6
+line\'6	6
 	7
 test table function
 line1	1
diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh
index 02b9a649e94..fd0a1df20ac 100755
--- a/tests/queries/0_stateless/01889_sqlite_read_write.sh
+++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh
@@ -76,7 +76,7 @@ ${CLICKHOUSE_CLIENT} --query='DROP TABLE IF EXISTS sqlite_table3'
 ${CLICKHOUSE_CLIENT} --query="CREATE TABLE sqlite_table3 (col1 String, col2 Int32) ENGINE = SQLite('${DB_PATH}', 'table3')"
 
 ${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_table3;' | sed -r 's/(.*SQLite)(.*)/\1/'
-${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES ('line6', 6);"
+${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES ('line\'6', 6);"
 ${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES (NULL, 7);"
 
 ${CLICKHOUSE_CLIENT} --query='SELECT * FROM sqlite_table3 ORDER BY col2'

From eeaa9fb1bac01d394be483cc555293c6c0d952ab Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 15 Feb 2024 11:38:13 +0000
Subject: [PATCH 725/884] Update tests

---
 ...mn_must_not_override_past_values.reference | 38 +++++++++-----
 ...e_column_must_not_override_past_values.sql | 50 +++++++++++++++----
 2 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
index a5a0370620b..461075e9607 100644
--- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
@@ -1,33 +1,45 @@
+DEFAULT expressions
 -- Compact parts
-Origin
-1	2
+Before materialize
+1	1
 2	54321
 After materialize
-1	2
+1	1
 2	54321
 -- Wide parts
-Origin
-1	2
+Before materialize
+1	1
 2	54321
 After materialize
-1	2
+1	1
 2	54321
 -- Nullable column != physically absent
-Origin
-1	2
+Before materialize
+1	1
 2	\N
 3	54321
 After materialize
-1	2
+1	1
 2	\N
 3	54321
 -- Parts with renamed column
-Origin
-1	2
+Before materialize
+1	1
 2	54321
 After rename
-1	2
+1	1
 2	54321
 After materialize
-1	2
+1	1
 2	54321
+MATERIALIZED expressions
+-- Compact parts
+Before materialize
+1	54321
+After materialize
+1	65432
+-- Compact parts
+Before materialize
+1	54321
+After materialize
+1	65432
diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
index 825c7eab048..cfdde287712 100644
--- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
@@ -1,11 +1,16 @@
 SET mutations_sync = 2;
 
+DROP TABLE IF EXISTS tab;
+
+-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN>
+SELECT 'DEFAULT expressions';
+
 SELECT '-- Compact parts';
 
 CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
-INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id, dflt) VALUES (1, 1);
 INSERT INTO tab (id) VALUES (2);
-SELECT 'Origin';
+SELECT 'Before materialize';
 SELECT * FROM tab ORDER BY id;
 ALTER TABLE tab MATERIALIZE COLUMN dflt;
 SELECT 'After materialize';
@@ -15,9 +20,9 @@ DROP TABLE tab;
 SELECT '-- Wide parts';
 
 CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
-INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id, dflt) VALUES (1, 1);
 INSERT INTO tab (id) VALUES (2);
-SELECT 'Origin';
+SELECT 'Before materialize';
 SELECT * FROM tab ORDER BY id;
 ALTER TABLE tab MATERIALIZE COLUMN dflt;
 SELECT 'After materialize';
@@ -27,10 +32,10 @@ DROP TABLE tab;
 SELECT '-- Nullable column != physically absent';
 
 CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
-INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id, dflt) VALUES (1, 1);
 INSERT INTO tab (id, dflt) VALUES (2, NULL);
 INSERT INTO tab (id) VALUES (3);
-SELECT 'Origin';
+SELECT 'Before materialize';
 SELECT * FROM tab ORDER BY id;
 ALTER TABLE tab MATERIALIZE COLUMN dflt;
 SELECT 'After materialize';
@@ -40,14 +45,41 @@ DROP TABLE tab;
 SELECT '-- Parts with renamed column';
 
 CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
-INSERT INTO tab (id, dflt) VALUES (1, 2);
+INSERT INTO tab (id, dflt) VALUES (1, 1);
 INSERT INTO tab (id) VALUES (2);
-SELECT 'Origin';
+SELECT 'Before materialize';
 SELECT * FROM tab ORDER BY id;
 ALTER TABLE tab RENAME COLUMN dflt TO dflt2;
 SELECT 'After rename';
 SELECT * FROM tab ORDER BY id;
-ALTER TABLE tab MATERIALIZE COLUMN bar;
+ALTER TABLE tab MATERIALIZE COLUMN dflt2;
 SELECT 'After materialize';
 SELECT * FROM tab ORDER BY id;
 DROP TABLE tab;
+
+-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime.
+SELECT 'MATERIALIZED expressions';
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id) VALUES (1);
+SELECT 'Before materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
+ALTER TABLE tab MATERIALIZE COLUMN mtrl;
+SELECT 'After materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id) VALUES (1);
+SELECT 'Before materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
+ALTER TABLE tab MATERIALIZE COLUMN mtrl;
+SELECT 'After materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+DROP TABLE tab;

From 91f54f44b60cd0c9ac21495f1c801a6297512ad8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 13:21:39 +0100
Subject: [PATCH 726/884] Fix several logical errors in arrayFold

---
 src/Functions/array/arrayFold.cpp             |  3 ++
 .../02990_arrayFold_nullable_lc.reference     | 14 +++++++++
 .../02990_arrayFold_nullable_lc.sql           | 31 +++++++++++++++++++
 3 files changed, 48 insertions(+)
 create mode 100644 tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
 create mode 100644 tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql

diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp
index 44fe95624a6..94d05238cea 100644
--- a/src/Functions/array/arrayFold.cpp
+++ b/src/Functions/array/arrayFold.cpp
@@ -32,6 +32,9 @@ public:
     size_t getNumberOfArguments() const override { return 0; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
     void getLambdaArgumentTypes(DataTypes & arguments) const override
     {
         if (arguments.size() < 3)
diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
new file mode 100644
index 00000000000..59eb1cea7a0
--- /dev/null
+++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
@@ -0,0 +1,14 @@
+23
+23
+23
+23
+3
+3
+\N
+1
+\N
+\N
+\N
+23
+23
+23
diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql
new file mode 100644
index 00000000000..01bd949bd4a
--- /dev/null
+++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql
@@ -0,0 +1,31 @@
+SET allow_suspicious_low_cardinality_types=1;
+
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toInt64(3));
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toInt64(toNullable(3)));
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], materialize(toInt64(toNullable(3))));
+
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(3)); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toNullable(3)));
+
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Int64), toInt64(3));
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Nullable(Int64)), toInt64(toNullable(3)));
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Nullable(Int64)), toInt64(NULL));
+
+SELECT arrayFold((acc, x) -> x, materialize(CAST('[0, 1]', 'Array(Nullable(UInt8))')), toUInt8(toNullable(0)));
+SELECT arrayFold((acc, x) -> x, materialize(CAST([NULL], 'Array(Nullable(UInt8))')), toUInt8(toNullable(0)));
+SELECT arrayFold((acc, x) -> acc + x, materialize(CAST([NULL], 'Array(Nullable(UInt8))')), toUInt64(toNullable(0)));
+SELECT arrayFold((acc, x) -> acc + x, materialize(CAST([1, 2, NULL], 'Array(Nullable(UInt8))')), toUInt64(toNullable(0)));
+
+SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toInt64(3)); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toNullable(toInt64(3)));
+
+
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3))); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3)));
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3)));
+
+
+SELECT arrayFold((acc, x) -> acc + (x * 2), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toNullable(3))); -- { serverError TYPE_MISMATCH }

From c580cdb96e2aaa25dd0e6ed719db519afbfc5613 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 15 Feb 2024 12:55:43 +0000
Subject: [PATCH 727/884] Fix review comment

---
 src/Interpreters/executeQuery.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 11eaffb99ff..5595e9801b2 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1381,6 +1381,7 @@ void executeQuery(
                     result_details.format = format_name;
 
                     fiu_do_on(FailPoints::execute_query_calling_empty_set_result_func_on_exception, {
+                        // it will throw std::bad_function_call
                         set_result_details = nullptr;
                         set_result_details(result_details);
                     });

From 5baae9326cc1920a270f06ac3d15ce7f8efd1fe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 14:10:55 +0100
Subject: [PATCH 728/884] Fix optimize_uniq_to_count removing the column alias

---
 .../RewriteUniqToCountVisitor.cpp             |  4 ++
 ...990_optimize_uniq_to_count_alias.reference |  2 +
 .../02990_optimize_uniq_to_count_alias.sql    | 37 +++++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference
 create mode 100644 tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql

diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp
index ddec6fe063e..a2e3a790c27 100644
--- a/src/Interpreters/RewriteUniqToCountVisitor.cpp
+++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp
@@ -156,7 +156,11 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/)
     };
 
     if (match_subquery_with_distinct() || match_subquery_with_group_by())
+    {
+        auto main_alias = expr_list->children[0]->tryGetAlias();
         expr_list->children[0] = makeASTFunction("count");
+        expr_list->children[0]->setAlias(main_alias);
+    }
 }
 
 }
diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference
new file mode 100644
index 00000000000..6ed281c757a
--- /dev/null
+++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql
new file mode 100644
index 00000000000..5ba0be39991
--- /dev/null
+++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql
@@ -0,0 +1,37 @@
+--https://github.com/ClickHouse/ClickHouse/issues/59999
+DROP TABLE IF EXISTS tags;
+CREATE TABLE tags (dev_tag String) ENGINE = Memory AS SELECT '1';
+
+SELECT *
+FROM
+(
+    SELECT countDistinct(dev_tag) AS total_devtags
+    FROM
+    (
+        SELECT dev_tag
+        FROM
+            (
+                SELECT *
+                FROM tags
+            ) AS t
+    GROUP BY dev_tag
+    ) AS t
+) SETTINGS optimize_uniq_to_count=0;
+
+SELECT *
+FROM
+(
+    SELECT countDistinct(dev_tag) AS total_devtags
+    FROM
+    (
+        SELECT dev_tag
+        FROM
+            (
+                SELECT *
+                FROM tags
+            ) AS t
+    GROUP BY dev_tag
+    ) AS t
+) SETTINGS optimize_uniq_to_count=1;
+
+DROP TABLE IF EXISTS tags;

From efa823400b8ff6c8d7eb95f232c20fea1fcfb229 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 15 Feb 2024 13:19:02 +0000
Subject: [PATCH 729/884] Add IDataType::forEachChild and use it in nested
 types validation

---
 src/DataTypes/DataTypeArray.cpp               |   5 +
 src/DataTypes/DataTypeArray.h                 |   1 +
 src/DataTypes/DataTypeLowCardinality.cpp      |   6 +
 src/DataTypes/DataTypeLowCardinality.h        |   2 +
 src/DataTypes/DataTypeMap.cpp                 |   8 ++
 src/DataTypes/DataTypeMap.h                   |   2 +
 src/DataTypes/DataTypeNullable.cpp            |   6 +
 src/DataTypes/DataTypeNullable.h              |   3 +
 src/DataTypes/DataTypeTuple.cpp               |   9 ++
 src/DataTypes/DataTypeTuple.h                 |   2 +
 src/DataTypes/DataTypeVariant.cpp             |   9 ++
 src/DataTypes/DataTypeVariant.h               |   2 +
 src/DataTypes/IDataType.h                     |   4 +
 .../parseColumnsListForTableFunction.cpp      | 115 +++++++-----------
 14 files changed, 104 insertions(+), 70 deletions(-)

diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp
index 24cd759e2a5..6e5760933eb 100644
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@@ -69,6 +69,11 @@ String DataTypeArray::doGetPrettyName(size_t indent) const
     return s.str();
 }
 
+void DataTypeArray::forEachChild(const ChildCallback & callback) const
+{
+    callback(*nested);
+    nested->forEachChild(callback);
+}
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h
index 6a09b3b530d..4423f137e1a 100644
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@@ -43,6 +43,7 @@ public:
 
     MutableColumnPtr createColumn() const override;
 
+    void forEachChild(const ChildCallback & callback) const override;
 
     Field getDefault() const override;
 
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index 3e94b533c7a..5af1f28cbad 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -153,6 +153,12 @@ SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const
     return std::make_shared<SerializationLowCardinality>(dictionary_type);
 }
 
+void DataTypeLowCardinality::forEachChild(const ChildCallback & callback) const
+{
+    callback(*dictionary_type);
+    dictionary_type->forEachChild(callback);
+}
+
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h
index 389e24ef2a9..cd926bb595c 100644
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@@ -60,6 +60,8 @@ public:
     static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type);
     static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys);
 
+    void forEachChild(const ChildCallback & callback) const override;
+
 private:
     SerializationPtr doGetDefaultSerialization() const override;
 
diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp
index 1f246af74d3..4b85606ff26 100644
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@@ -143,6 +143,14 @@ DataTypePtr DataTypeMap::getNestedTypeWithUnnamedTuple() const
     return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(from_tuple.getElements()));
 }
 
+void DataTypeMap::forEachChild(const DB::IDataType::ChildCallback & callback) const
+{
+    callback(*key_type);
+    key_type->forEachChild(callback);
+    callback(*value_type);
+    value_type->forEachChild(callback);
+}
+
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.size() != 2)
diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h
index 257888a8e44..7281cca1bb1 100644
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@@ -54,6 +54,8 @@ public:
 
     static bool checkKeyType(DataTypePtr key_type);
 
+    void forEachChild(const ChildCallback & callback) const override;
+
 private:
     void assertKeyType() const;
 };
diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index 484d779551f..16d5d41e5e5 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -61,6 +61,12 @@ SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
     return std::make_shared<SerializationNullable>(nested_data_type->getDefaultSerialization());
 }
 
+void DataTypeNullable::forEachChild(const ChildCallback & callback) const
+{
+    callback(*nested_data_type);
+    nested_data_type->forEachChild(callback);
+}
+
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h
index 7ad0e1ba5f1..b102c767993 100644
--- a/src/DataTypes/DataTypeNullable.h
+++ b/src/DataTypes/DataTypeNullable.h
@@ -43,6 +43,9 @@ public:
     bool canBePromoted() const override { return nested_data_type->canBePromoted(); }
 
     const DataTypePtr & getNestedType() const { return nested_data_type; }
+
+    void forEachChild(const ChildCallback & callback) const override;
+
 private:
     SerializationPtr doGetDefaultSerialization() const override;
 
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 5c9d5a3366e..26a871182a7 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -376,6 +376,15 @@ SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column)
     return std::make_shared<SerializationInfoTuple>(std::move(infos), names, SerializationInfo::Settings{});
 }
 
+void DataTypeTuple::forEachChild(const ChildCallback & callback) const
+{
+    for (const auto & elem : elems)
+    {
+        callback(*elem);
+        elem->forEachChild(callback);
+    }
+}
+
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index db49b7f22d1..4e5a0c1b33c 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -70,6 +70,8 @@ public:
     String getNameByPosition(size_t i) const;
 
     bool haveExplicitNames() const { return have_explicit_names; }
+
+    void forEachChild(const ChildCallback & callback) const override;
 };
 
 }
diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp
index 456b4ea03b6..0543507a14d 100644
--- a/src/DataTypes/DataTypeVariant.cpp
+++ b/src/DataTypes/DataTypeVariant.cpp
@@ -175,6 +175,15 @@ SerializationPtr DataTypeVariant::doGetDefaultSerialization() const
     return std::make_shared<SerializationVariant>(std::move(serializations), std::move(variant_names), SerializationVariant::getVariantsDeserializeTextOrder(variants), getName());
 }
 
+void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback) const
+{
+    for (const auto & variant : variants)
+    {
+        callback(*variant);
+        variant->forEachChild(callback);
+    }
+}
+
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.empty())
diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h
index d26ce4ea90f..2a2206f985a 100644
--- a/src/DataTypes/DataTypeVariant.h
+++ b/src/DataTypes/DataTypeVariant.h
@@ -54,6 +54,8 @@ public:
     /// Check if Variant has provided type in the list of variants and return its discriminator.
     std::optional<ColumnVariant::Discriminator> tryGetVariantDiscriminator(const DataTypePtr & type) const;
 
+    void forEachChild(const ChildCallback & callback) const override;
+
 private:
     std::string doGetName() const override;
     std::string doGetPrettyName(size_t indent) const override;
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 48cc127746f..220658afda5 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -111,6 +111,10 @@ public:
         const SubcolumnCallback & callback,
         const SubstreamData & data);
 
+    /// Call callback for each nested type recursively.
+    using ChildCallback = std::function<void(const IDataType &)>;
+    virtual void forEachChild(const ChildCallback &) const {}
+
     Names getSubcolumnNames() const;
 
     virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp
index 056674c4379..1499568cec9 100644
--- a/src/Interpreters/parseColumnsListForTableFunction.cpp
+++ b/src/Interpreters/parseColumnsListForTableFunction.cpp
@@ -7,11 +7,6 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeVariant.h>
-#include <DataTypes/DataTypeMap.h>
-
 
 namespace DB
 {
@@ -24,84 +19,64 @@ namespace ErrorCodes
 
 }
 
-void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings)
+void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidationSettings & settings)
 {
-    if (!settings.allow_suspicious_low_cardinality_types)
+    auto validate_callback = [&](const IDataType & data_type)
     {
-        if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+        if (!settings.allow_suspicious_low_cardinality_types)
         {
-            if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType())))
-                throw Exception(
-                    ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY,
-                    "Creating columns of type {} is prohibited by default due to expected negative impact on performance. "
-                    "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.",
-                    lc_type->getName());
+            if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(&data_type))
+            {
+                if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType())))
+                    throw Exception(
+                        ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY,
+                        "Creating columns of type {} is prohibited by default due to expected negative impact on performance. "
+                        "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.",
+                        lc_type->getName());
+            }
         }
-    }
 
-    if (!settings.allow_experimental_object_type)
-    {
-        if (type->hasDynamicSubcolumns())
+        if (!settings.allow_experimental_object_type)
         {
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Cannot create column with type '{}' because experimental Object type is not allowed. "
-                "Set setting allow_experimental_object_type = 1 in order to allow it", type->getName());
-        }
-    }
-
-    if (!settings.allow_suspicious_fixed_string_types)
-    {
-        if (const auto * fixed_string = typeid_cast<const DataTypeFixedString *>(type.get()))
-        {
-            if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS)
+            if (data_type.hasDynamicSubcolumns())
+            {
                 throw Exception(
                     ErrorCodes::ILLEGAL_COLUMN,
-                    "Cannot create column with type '{}' because fixed string with size > {} is suspicious. "
-                    "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it",
-                    type->getName(),
-                    MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS);
+                    "Cannot create column with type '{}' because experimental Object type is not allowed. "
+                    "Set setting allow_experimental_object_type = 1 in order to allow it",
+                    data_type.getName());
+            }
         }
-    }
 
-    if (!settings.allow_experimental_variant_type)
-    {
-        if (isVariant(type))
+        if (!settings.allow_suspicious_fixed_string_types)
         {
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Cannot create column with type '{}' because experimental Variant type is not allowed. "
-                "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName());
+            if (const auto * fixed_string = typeid_cast<const DataTypeFixedString *>(&data_type))
+            {
+                if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS)
+                    throw Exception(
+                        ErrorCodes::ILLEGAL_COLUMN,
+                        "Cannot create column with type '{}' because fixed string with size > {} is suspicious. "
+                        "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it",
+                        data_type.getName(),
+                        MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS);
+            }
         }
-    }
 
-    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
-    {
-        validateDataType(nullable_type->getNestedType(), settings);
-    }
-    else if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
-    {
-        validateDataType(lc_type->getDictionaryType(), settings);
-    }
-    else if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
-    {
-        validateDataType(array_type->getNestedType(), settings);
-    }
-    else if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
-    {
-        for (const auto & element : tuple_type->getElements())
-            validateDataType(element, settings);
-    }
-    else if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
-    {
-        validateDataType(map_type->getKeyType(), settings);
-        validateDataType(map_type->getValueType(), settings);
-    }
-    else if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
-    {
-        for (const auto & variant : variant_type->getVariants())
-            validateDataType(variant, settings);
-    }
+        if (!settings.allow_experimental_variant_type)
+        {
+            if (isVariant(data_type))
+            {
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN,
+                    "Cannot create column with type '{}' because experimental Variant type is not allowed. "
+                    "Set setting allow_experimental_variant_type = 1 in order to allow it",
+                    data_type.getName());
+            }
+        }
+    };
+
+    validate_callback(*type_to_check);
+    type_to_check->forEachChild(validate_callback);
 }
 
 ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context)

From 2d42054845c123895b77b2be3166f7f3638c4992 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 14:44:38 +0100
Subject: [PATCH 730/884] Fix 02981_vertical_merges_memory_usage with
 SharedMergeTree

---
 .../0_stateless/02981_vertical_merges_memory_usage.sql        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
index 6ca594ebc7d..c1b6e0beb7d 100644
--- a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
+++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
@@ -1,4 +1,4 @@
--- Tags: long
+-- Tags: long, no-random-merge-tree-settings
 
 DROP TABLE IF EXISTS t_vertical_merge_memory;
 
@@ -14,7 +14,7 @@ SETTINGS
     merge_max_block_size_bytes = '10M';
 
 INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000);
-INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000);
+INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3001);
 
 OPTIMIZE TABLE t_vertical_merge_memory FINAL;
 

From daf7505e5e0893f15f7fa0e9eccc1adcd22f48bc Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 15 Feb 2024 14:12:32 +0100
Subject: [PATCH 731/884] Improve

---
 src/Coordination/Changelog.cpp                | 230 ++++++++++++------
 src/Coordination/Changelog.h                  |  26 +-
 src/Coordination/KeeperLogStore.cpp           |   5 +-
 src/Coordination/tests/gtest_coordination.cpp |  33 ++-
 4 files changed, 200 insertions(+), 94 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 07e9a3faeee..63bfb709125 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -69,10 +69,7 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip
 
     /// a different thread could be trying to read from the file
     /// we should make sure the source disk contains the file while read is in progress
-    {
-        std::lock_guard file_lock(description->file_mutex);
-        description->disk = disk_to;
-    }
+    description->withLock([&]{ description->disk = disk_to; });
     disk_from->removeFile(description->path);
     description->path = path_to;
 }
@@ -723,29 +720,33 @@ void LogEntryStorage::prefetchCommitLogs()
             for (const auto & prefetch_file_info : prefetch_info->file_infos)
             {
                 const auto & [changelog_description, position, count] = prefetch_file_info;
-                std::lock_guard file_lock(changelog_description->file_mutex);
-                auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
-                file->seek(position, SEEK_SET);
-                LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position);
-                ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count);
+                changelog_description->withLock(
+                    [&]
+                    {
+                        auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
+                        file->seek(position, SEEK_SET);
+                        LOG_TRACE(
+                            log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position);
+                        ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count);
 
-                for (size_t i = 0; i < count; ++i)
-                {
-                    if (prefetch_info->cancel)
-                        break;
+                        for (size_t i = 0; i < count; ++i)
+                        {
+                            if (prefetch_info->cancel)
+                                break;
 
-                    auto record = readChangelogRecord(*file, changelog_description->path);
-                    auto entry = logEntryFromRecord(record);
-                    if (current_index != record.header.index)
-                        throw Exception(
-                            ErrorCodes::LOGICAL_ERROR,
-                            "Invalid index prefetched, expected {}, actual {}",
-                            current_index,
-                            record.header.index);
+                            auto record = readChangelogRecord(*file, changelog_description->path);
+                            auto entry = logEntryFromRecord(record);
+                            if (current_index != record.header.index)
+                                throw Exception(
+                                    ErrorCodes::LOGICAL_ERROR,
+                                    "Invalid index prefetched, expected {}, actual {}",
+                                    current_index,
+                                    record.header.index);
 
-                    commit_logs_cache.getPrefetchedCacheEntry(record.header.index).resolve(std::move(entry));
-                    ++current_index;
-                }
+                            commit_logs_cache.getPrefetchedCacheEntry(record.header.index).resolve(std::move(entry));
+                            ++current_index;
+                        }
+                    });
 
                 if (prefetch_info->cancel)
                     break;
@@ -770,6 +771,7 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con
     if (keeper_context->isShutdownCalled())
         return;
 
+    /// commit logs is not empty and it's not next log
     if (!commit_logs_cache.empty() && commit_logs_cache.max_index_in_cache != last_committed_index)
         return;
 
@@ -782,14 +784,24 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con
 
     auto new_prefetch_info = std::make_shared<PrefetchInfo>();
     auto & [prefetch_from, prefetch_to] = new_prefetch_info->commit_prefetch_index_range;
+
     /// if there are no entries in commit cache we will start from the next log that will be committed
     /// otherwise we continue appending the commit cache from the latest entry stored in it
-    size_t current_index = commit_logs_cache.cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1;
+    size_t current_index = commit_logs_cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1;
+
     prefetch_from = current_index;
+
     size_t total_size = 0;
     std::vector<FileReadInfo> file_infos;
     FileReadInfo * current_file_info = nullptr;
-    for (; latest_logs_cache.empty() || current_index < latest_logs_cache.min_index_in_cache; ++current_index)
+
+    size_t max_index_for_prefetch = 0;
+    if (!latest_logs_cache.empty())
+        max_index_for_prefetch = latest_logs_cache.min_index_in_cache - 1;
+    else
+        max_index_for_prefetch = max_index_with_location;
+
+    for (; current_index <= max_index_for_prefetch; ++current_index)
     {
         const auto & [changelog_description, position, size] = logs_location.at(current_index);
         if (total_size == 0)
@@ -841,9 +853,8 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, size_t size, Cache
 {
     auto [_, inserted] = cache.emplace(index, std::move(log_entry));
     if (!inserted)
-    {
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index);
-    }
+
     updateStatsWithNewEntry(index, size);
 }
 
@@ -991,6 +1002,8 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry)
         latest_config_index = index;
         logs_with_config_changes.insert(index);
     }
+
+    updateTermInfoWithNewEntry(index, log_entry->get_term());
 }
 
 bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size)
@@ -1002,6 +1015,15 @@ bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entr
     return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size);
 }
 
+void LogEntryStorage::updateTermInfoWithNewEntry(uint64_t index, uint64_t term)
+{
+    if (!log_term_infos.empty() && log_term_infos.back().term == term)
+        return;
+
+    chassert(log_term_infos.empty() || log_term_infos.back().term == term - 1);
+    log_term_infos.push_back(LogTermInfo{.term = term, .first_index = index});
+}
+
 void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location)
 {
     auto entry_size = logEntrySize(log_entry);
@@ -1026,14 +1048,13 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l
         latest_config_index = index;
         logs_with_config_changes.insert(index);
     }
+
+    updateTermInfoWithNewEntry(index, log_entry->get_term());
 }
 
 void LogEntryStorage::cleanUpTo(uint64_t index)
 {
     latest_logs_cache.cleanUpTo(index);
-    /// uncommitted logs should never be compacted so we don't have to handle
-    /// logs that are currently being prefetched
-    commit_logs_cache.cleanUpTo(index);
 
     if (!logs_location.empty() && index > min_index_with_location)
     {
@@ -1056,6 +1077,27 @@ void LogEntryStorage::cleanUpTo(uint64_t index)
         }
     }
 
+
+    /// uncommitted logs should be compacted only if we received snapshot from leader
+    if (current_prefetch_info && !current_prefetch_info->done)
+    {
+        auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range;
+        /// if we will clean some logs that are currently prefetched, stop prefetching
+        /// and clean all logs that were being prefetched
+        if (index > prefetch_from)
+        {
+            current_prefetch_info->cancel = true;
+            current_prefetch_info->done.wait(false);
+            commit_logs_cache.cleanUpTo(std::max(prefetch_to + 1, index));
+        }
+        /// start prefetching logs for committing at the current index
+        /// the last log index in the snapshot should be the
+        /// last log we cleaned up
+        startCommitLogsPrefetch(index - 1);
+    }
+    else
+        commit_logs_cache.cleanUpTo(index);
+
     std::erase_if(logs_with_config_changes, [&](const auto conf_index) { return conf_index < index; });
     if (auto it = std::max_element(logs_with_config_changes.begin(), logs_with_config_changes.end()); it != logs_with_config_changes.end())
     {
@@ -1067,31 +1109,24 @@ void LogEntryStorage::cleanUpTo(uint64_t index)
 
     if (first_log_index < index)
         first_log_entry = nullptr;
+
+    /// remove all the term infos we don't need (all terms that start before index)
+    uint64_t last_removed_term = 0;
+    while (!log_term_infos.empty() && log_term_infos.front().first_index < index)
+    {
+        last_removed_term = log_term_infos.front().term;
+        log_term_infos.pop_front();
+    }
+
+    /// the last removed term info could contain terms for some indices we didn't cleanup
+    /// so we add the last removed term info back but with new first index
+    if (last_removed_term != 0 && (log_term_infos.empty() || log_term_infos.front().first_index > index))
+        log_term_infos.push_front(LogTermInfo{.term = last_removed_term, .first_index = index});
 }
 
 void LogEntryStorage::cleanAfter(uint64_t index)
 {
     latest_logs_cache.cleanAfter(index);
-    /// if we cleared all latest logs, there is a possibility we would need to clear commit logs
-    if (latest_logs_cache.empty())
-    {
-        /// we will clean everything after the index, if there is a prefetch in progress
-        /// wait until we fetch everything until index
-        /// afterwards we can stop prefetching of newer logs because they will be cleaned up
-        commit_logs_cache.getEntry(index);
-        if (current_prefetch_info && !current_prefetch_info->done)
-        {
-            auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range;
-            if (index >= prefetch_from && index <= prefetch_to)
-            {
-                current_prefetch_info->cancel = true;
-                current_prefetch_info->done.wait(false);
-            }
-        }
-
-        commit_logs_cache.cleanAfter(index);
-        startCommitLogsPrefetch(keeper_context->lastCommittedIndex());
-    }
 
     if (!logs_location.empty() && index < max_index_with_location)
     {
@@ -1114,9 +1149,33 @@ void LogEntryStorage::cleanAfter(uint64_t index)
         }
     }
 
-    if (empty())
-        /// if we don't store any logs, reset first log cache
+    /// if we cleared all latest logs, there is a possibility we would need to clear commit logs
+    if (latest_logs_cache.empty())
+    {
+        /// we will clean everything after the index, if there is a prefetch in progress
+        /// wait until we fetch everything until index
+        /// afterwards we can stop prefetching of newer logs because they will be cleaned up
+        commit_logs_cache.getEntry(index);
+        if (current_prefetch_info && !current_prefetch_info->done)
+        {
+            auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range;
+            /// if we will clean some logs that are currently prefetched, stop prefetching
+            if (index < prefetch_to)
+            {
+                current_prefetch_info->cancel = true;
+                current_prefetch_info->done.wait(false);
+            }
+        }
+
+        commit_logs_cache.cleanAfter(index);
+        startCommitLogsPrefetch(keeper_context->lastCommittedIndex());
+    }
+
+    if (empty() || first_log_index > index)
+    {
+        /// if we don't store any logs or if the first log index changed, reset first log cache
         first_log_entry = nullptr;
+    }
 
     std::erase_if(logs_with_config_changes, [&](const auto conf_index) { return conf_index > index; });
     if (auto it = std::max_element(logs_with_config_changes.begin(), logs_with_config_changes.end()); it != logs_with_config_changes.end())
@@ -1127,8 +1186,9 @@ void LogEntryStorage::cleanAfter(uint64_t index)
     else
         latest_config = nullptr;
 
-    if (first_log_index > index)
-        first_log_entry = nullptr;
+    /// remove all the term infos we don't need (all terms that start after index)
+    while (!log_term_infos.empty() && log_term_infos.back().first_index > index)
+        log_term_infos.pop_back();
 }
 
 bool LogEntryStorage::contains(uint64_t index) const
@@ -1165,13 +1225,22 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
     else if (auto it = logs_location.find(index); it != logs_location.end())
     {
         const auto & [changelog_description, position, size] = it->second;
-        std::lock_guard file_lock(changelog_description->file_mutex);
-        auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
-        file->seek(position, SEEK_SET);
-        LOG_TRACE(log, "Reading log entry at index {} from path {}, position {}, size {}", index, changelog_description->path, position, size);
+        changelog_description->withLock(
+            [&]
+            {
+                auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
+                file->seek(position, SEEK_SET);
+                LOG_TRACE(
+                    log,
+                    "Reading log entry at index {} from path {}, position {}, size {}",
+                    index,
+                    changelog_description->path,
+                    position,
+                    size);
 
-        auto record = readChangelogRecord(*file, changelog_description->path);
-        entry = logEntryFromRecord(record);
+                auto record = readChangelogRecord(*file, changelog_description->path);
+                entry = logEntryFromRecord(record);
+            });
 
         /// if we fetched the first log entry, we will cache it because it's often accessed
         if (first_log_entry == nullptr && index == getFirstIndex())
@@ -1197,10 +1266,18 @@ LogEntryPtr LogEntryStorage::getLatestConfigChange() const
     return latest_config;
 }
 
-void LogEntryStorage::cacheFirstLog(uint64_t first_index)
+uint64_t LogEntryStorage::termAt(uint64_t index) const
 {
-    first_log_entry = getEntry(first_index);
-    first_log_index = first_index;
+    uint64_t term_for_index = 0;
+    for (const auto [term, first_index] : log_term_infos)
+    {
+        if (index < first_index)
+            return term_for_index;
+
+        term_for_index = term;
+    }
+
+    return term_for_index;
 }
 
 void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocation>> && indices_with_log_locations)
@@ -1268,16 +1345,19 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end
 
         const auto & [file_description, start_position, count] = *read_info;
         LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count);
-        std::lock_guard file_lock(file_description->file_mutex);
-        auto file = file_description->disk->readFile(file_description->path);
-        file->seek(start_position, SEEK_SET);
+        file_description->withLock(
+            [&]
+            {
+                auto file = file_description->disk->readFile(file_description->path);
+                file->seek(start_position, SEEK_SET);
 
-        for (size_t i = 0; i < count; ++i)
-        {
-            auto record = readChangelogRecord(*file, file_description->path);
-            ret->push_back(logEntryFromRecord(record));
-            ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
-        }
+                for (size_t i = 0; i < count; ++i)
+                {
+                    auto record = readChangelogRecord(*file, file_description->path);
+                    ret->push_back(logEntryFromRecord(record));
+                    ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
+                }
+            });
 
         read_info.reset();
     };
@@ -2168,7 +2248,11 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
 bool Changelog::isConfigLog(uint64_t index) const
 {
     return entry_storage.isConfigLog(index);
+}
 
+uint64_t Changelog::termAt(uint64_t index) const
+{
+    return entry_storage.termAt(index);
 }
 
 bool Changelog::flush()
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index f560b908ea7..d18f6b84283 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -81,6 +81,13 @@ struct ChangelogFileDescription
 
     /// How many entries should be stored in this log
     uint64_t expectedEntriesCountInLog() const { return to_log_index - from_log_index + 1; }
+
+    template <typename TFunction>
+    void withLock(TFunction && fn)
+    {
+        std::lock_guard lock(file_mutex);
+        fn();
+    }
 };
 
 using ChangelogFileDescriptionPtr = std::shared_ptr<ChangelogFileDescription>;
@@ -168,8 +175,7 @@ struct LogEntryStorage
     LogEntryPtr getEntry(uint64_t index) const;
     void clear();
     LogEntryPtr getLatestConfigChange() const;
-
-    void cacheFirstLog(uint64_t first_index);
+    uint64_t termAt(uint64_t index) const;
 
     using IndexWithLogLocation = std::pair<uint64_t, LogLocation>;
 
@@ -195,6 +201,8 @@ private:
 
     bool shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size);
 
+    void updateTermInfoWithNewEntry(uint64_t index, uint64_t term);
+
     struct InMemoryCache
     {
         explicit InMemoryCache(size_t size_threshold_);
@@ -269,6 +277,17 @@ private:
     /// store indices of logs that contain config changes
     std::unordered_set<uint64_t> logs_with_config_changes;
 
+    struct LogTermInfo
+    {
+        uint64_t term = 0;
+        uint64_t first_index = 0;
+    };
+
+    /// store first index of each term
+    /// so we don't have to fetch log to return that information
+    /// terms are monotonically increasing so first index is enough
+    std::deque<LogTermInfo> log_term_infos;
+
     bool is_shutdown = false;
     KeeperContextPtr keeper_context;
     LoggerPtr log;
@@ -324,6 +343,7 @@ public:
     void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer);
 
     bool isConfigLog(uint64_t index) const;
+    uint64_t termAt(uint64_t index) const;
 
     /// Fsync latest log to disk and flush buffer
     bool flush();
@@ -384,8 +404,6 @@ private:
 
     LogEntryStorage entry_storage;
 
-    std::unordered_set<uint64_t> conf_logs_indices;
-
     uint64_t max_log_id = 0;
     /// For compaction, queue of delete not used logs
     /// 128 is enough, even if log is not removed, it's not a problem
diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp
index f4d850c94ee..820039d8a8f 100644
--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@@ -75,10 +75,7 @@ bool KeeperLogStore::is_conf(uint64_t index)
 uint64_t KeeperLogStore::term_at(uint64_t index)
 {
     std::lock_guard lock(changelog_lock);
-    auto entry = changelog.entryAt(index);
-    if (entry)
-        return entry->get_term();
-    return 0;
+    return changelog.termAt(index);
 }
 
 nuraft::ptr<nuraft::buffer> KeeperLogStore::pack(uint64_t index, int32_t cnt)
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 3c1b8fbd359..61ff8c3f16a 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1762,19 +1762,25 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co
 void testLogAndStateMachine(
     Coordination::CoordinationSettingsPtr settings,
     uint64_t total_logs,
-    bool enable_compression,
-    Coordination::KeeperContextPtr keeper_context)
+    bool enable_compression)
 {
     using namespace Coordination;
     using namespace DB;
 
     ChangelogDirTest snapshots("./snapshots");
-    keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots"));
     ChangelogDirTest logs("./logs");
-    keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs"));
+
+    auto get_keeper_context = [&]
+    {
+        auto local_keeper_context = std::make_shared<DB::KeeperContext>(true);
+        local_keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots"));
+        local_keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs"));
+        return local_keeper_context;
+    };
 
     ResponsesQueue queue(std::numeric_limits<size_t>::max());
     SnapshotsQueue snapshots_queue{1};
+    auto keeper_context = get_keeper_context();
     auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, settings, keeper_context, nullptr);
     state_machine->init();
     DB::KeeperLogStore changelog(
@@ -1821,6 +1827,7 @@ void testLogAndStateMachine(
     }
 
     SnapshotsQueue snapshots_queue1{1};
+    keeper_context = get_keeper_context();
     auto restore_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue1, settings, keeper_context, nullptr);
     restore_machine->init();
     EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance);
@@ -1868,63 +1875,63 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore)
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 37, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 37, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 11, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 11, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 10;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 40, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 20;
         settings->rotate_log_storage_interval = 30;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 40, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 0;
         settings->rotate_log_storage_interval = 10;
-        testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 40, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 1;
         settings->reserved_log_items = 1;
         settings->rotate_log_storage_interval = 32;
-        testLogAndStateMachine(settings, 32, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 32, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 10;
         settings->reserved_log_items = 7;
         settings->rotate_log_storage_interval = 1;
-        testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 33, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 37;
         settings->reserved_log_items = 1000;
         settings->rotate_log_storage_interval = 5000;
-        testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 33, params.enable_compression);
     }
     {
         CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
         settings->snapshot_distance = 37;
         settings->reserved_log_items = 1000;
         settings->rotate_log_storage_interval = 5000;
-        testLogAndStateMachine(settings, 45, params.enable_compression, keeper_context);
+        testLogAndStateMachine(settings, 45, params.enable_compression);
     }
 }
 

From a11b9553ca290c28e8014832bc4fa2aad6c3f6f6 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 15 Feb 2024 17:16:27 +0100
Subject: [PATCH 732/884] Fix implementation in new analyzer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 69 ++++++-------------
 src/Analyzer/TableFunctionNode.cpp            |  7 --
 src/Analyzer/TableFunctionNode.h              | 13 +---
 src/Interpreters/Context.cpp                  | 30 ++++++++
 src/Interpreters/Context.h                    |  2 +
 .../02428_parameterized_view.reference        |  1 +
 .../0_stateless/02428_parameterized_view.sh   |  3 +-
 7 files changed, 58 insertions(+), 67 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index ed67250312a..269094cbf63 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6634,55 +6634,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     QueryExpressionsAliasVisitor & expressions_visitor,
     bool nested_table_function)
 {
-
-    String database_name = scope.context->getCurrentDatabase();
-    String table_name;
-
-    auto * function_ast = table_function_node->getOriginalAST() ? table_function_node->getOriginalAST()->as<ASTFunction>() : nullptr;
     auto &table_function_node_typed = table_function_node->as<TableFunctionNode &>();
 
-    if (function_ast)
-    {
-        table_name = function_ast->name;
-        if (function_ast->is_compound_name)
-        {
-            std::vector<std::string> parts;
-            splitInto<'.'>(parts, function_ast->name);
-
-            if (parts.size() == 2)
-            {
-                database_name = parts[0];
-                table_name = parts[1];
-            }
-        }
-
-        StoragePtr table = table_name.empty() ? nullptr : DatabaseCatalog::instance().tryGetTable(
-                {database_name, table_name}, scope.context->getQueryContext());
-        if (table)
-        {
-            if (table.get()->isView() && table->as<StorageView>() && table->as<StorageView>()->isParameterizedView())
-            {
-                auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
-                NameToNameMap parameterized_view_values = analyzeFunctionParamValues(
-                        table_function_node->getOriginalAST());
-                StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
-
-                ASTCreateQuery create;
-                create.select = query->as<ASTSelectWithUnionQuery>();
-                auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, scope.context);
-                auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
-                                                         create,
-                                                         ColumnsDescription(sample_block.getNamesAndTypesList()),
-                        /* comment */ "",
-                        /* is_parameterized_view */ true);
-                res->startup();
-                function_ast->prefer_subquery_to_function_formatting = true;
-                table_function_node_typed.resolve(std::move(res), scope.context);
-                return;
-            }
-        }
-    }
-
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
 
@@ -6693,6 +6646,28 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().tryGet(table_function_name, scope_context);
     if (!table_function_ptr)
     {
+        String database_name = scope_context->getCurrentDatabase();
+        String table_name;
+
+        auto function_ast = table_function_node->toAST();
+        Identifier table_identifier{table_function_name};
+        if (table_identifier.getPartsSize() == 1)
+        {
+            table_name = table_identifier[0];
+        }
+        else if (table_identifier.getPartsSize() == 2)
+        {
+            database_name = table_identifier[0];
+            table_name = table_identifier[1];
+        }
+
+        auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name);
+        if (parametrized_view_storage)
+        {
+            table_function_node = std::make_shared<TableNode>(parametrized_view_storage, scope_context);
+            return;
+        }
+
         auto hints = TableFunctionFactory::instance().getHints(table_function_name);
         if (!hints.empty())
             throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp
index f4ffe7f4ee5..e5158a06373 100644
--- a/src/Analyzer/TableFunctionNode.cpp
+++ b/src/Analyzer/TableFunctionNode.cpp
@@ -36,13 +36,6 @@ void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePt
     unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_);
 }
 
-void TableFunctionNode::resolve(StoragePtr storage_value, ContextPtr context)
-{
-    storage = std::move(storage_value);
-    storage_id = storage->getStorageID();
-    storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
-}
-
 const StorageID & TableFunctionNode::getStorageID() const
 {
     if (!storage)
diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h
index cb65511bfce..69237ac8416 100644
--- a/src/Analyzer/TableFunctionNode.h
+++ b/src/Analyzer/TableFunctionNode.h
@@ -5,7 +5,6 @@
 #include <Storages/IStorage_fwd.h>
 #include <Storages/TableLockHolder.h>
 #include <Storages/StorageSnapshot.h>
-#include <Storages/StorageView.h>
 
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/StorageID.h>
@@ -74,14 +73,7 @@ public:
     /// Returns true, if table function is resolved, false otherwise
     bool isResolved() const
     {
-        /// For parameterized view, we only have storage
-        if (storage)
-            if (storage->as<StorageView>() && storage->as<StorageView>()->isParameterizedView())
-                return true;
-            else
-                return table_function != nullptr;
-        else
-            return false;
+        return storage != nullptr && table_function != nullptr;
     }
 
     /// Get table function, returns nullptr if table function node is not resolved
@@ -108,9 +100,6 @@ public:
     /// Resolve table function with table function, storage and context
     void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_);
 
-    /// Resolve table function as parameterized view with storage and context
-    void resolve(StoragePtr storage_value, ContextPtr context);
-
     /// Get storage id, throws exception if function node is not resolved
     const StorageID & getStorageID() const;
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0e5897e7306..1f11779a6aa 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -94,6 +94,7 @@
 #include <Common/logger_useful.h>
 #include <Common/RemoteHostFilter.h>
 #include <Common/HTTPHeaderFilter.h>
+#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
@@ -1931,6 +1932,35 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 }
 
 
+StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name)
+{
+    if (table_name.empty())
+        return nullptr;
+
+    StoragePtr original_view = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext());
+    if (!original_view || !original_view->isView())
+        return nullptr;
+    auto * storage_view = original_view->as<StorageView>();
+    if (!storage_view || !storage_view->isParameterizedView())
+        return nullptr;
+
+    auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
+    NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression);
+    StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
+
+    ASTCreateQuery create;
+    create.select = query->as<ASTSelectWithUnionQuery>();
+    auto sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query, shared_from_this());
+    auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
+                                                create,
+                                                ColumnsDescription(sample_block.getNamesAndTypesList()),
+            /* comment */ "",
+            /* is_parameterized_view */ true);
+    res->startup();
+    return res;
+}
+
+
 void Context::addViewSource(const StoragePtr & storage)
 {
     if (view_source)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8d40ccb301b..cdd188faa48 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -718,6 +718,8 @@ public:
     /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass.
     StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr);
 
+    StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name);
+
     void addViewSource(const StoragePtr & storage);
     StoragePtr getViewSource() const;
 
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 422fdaa4983..fd77e6ed8df 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -23,6 +23,7 @@ ERROR
 20
 20
 ERROR
+20
 30
 20
 30
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh
index 499b8697ffc..c6f0927db36 100755
--- a/tests/queries/0_stateless/02428_parameterized_view.sh
+++ b/tests/queries/0_stateless/02428_parameterized_view.sh
@@ -72,7 +72,8 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES
 $CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Paper', 20, 1)"
 $CLICKHOUSE_CLIENT -q "CREATE VIEW ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1 AS SELECT * FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog WHERE Price={price:UInt64}"
 $CLICKHOUSE_CLIENT -q "SELECT Price FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1(price=20)"
-$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20)"  2>&1 |  grep -Fq "UNKNOWN_FUNCTION" &&  echo 'ERROR' || echo 'OK'
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20) SETTINGS allow_experimental_analyzer = 0"  2>&1 |  grep -Fq "UNKNOWN_FUNCTION" &&  echo 'ERROR' || echo 'OK'
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20) SETTINGS allow_experimental_analyzer = 1"
 
 
 $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)"

From 1edf3b2254b1300308f3f8691de94f100e900098 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 15 Feb 2024 16:19:56 +0000
Subject: [PATCH 733/884] Replace ORDER BY ALL by ORDER BY * to get rid of
 ambiguities

---
 .../statements/select/order-by.md             |  3 +-
 .../statements/select/order-by.md             |  6 +-
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |  3 -
 src/Analyzer/QueryNode.h                      |  4 +-
 src/Interpreters/TreeRewriter.cpp             | 19 +---
 src/Parsers/ASTSelectQuery.cpp                |  2 +-
 src/Parsers/ParserSelectQuery.cpp             | 24 ++---
 .../0_stateless/02567_and_consistency.sql     | 10 +-
 .../02884_string_distance_function.sql        | 14 +--
 .../0_stateless/02943_order_by_all.reference  | 34 +------
 .../0_stateless/02943_order_by_all.sql        | 96 ++++---------------
 .../02962_join_using_bug_57894.sql            | 12 +--
 12 files changed, 61 insertions(+), 166 deletions(-)

diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index bea5dcab461..29aca70762e 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -9,10 +9,9 @@ The `ORDER BY` clause contains
 
 - a list of expressions, e.g. `ORDER BY visits, search_phrase`,
 - a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
-- `ALL` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY ALL`.
+- `*` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY *`.
 
 To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
-`ORDER BY ALL` cannot be used when the `SELECT` clause contains identifiers or aliases named `all` (case-insensitively).
 
 The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
 Unless an explicit sort order is specified, `ASC` is used by default.
diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md
index 3286fc9f9e7..9540c96a10d 100644
--- a/docs/zh/sql-reference/statements/select/order-by.md
+++ b/docs/zh/sql-reference/statements/select/order-by.md
@@ -61,14 +61,14 @@ sidebar_label: ORDER BY
 
 我们只建议使用 `COLLATE` 对于少量行的最终排序，因为排序与 `COLLATE` 比正常的按字节排序效率低。
 
-## ORDER BY ALL
+## ORDER BY *
 
-`ORDER BY ALL` 对所有选定的列进行升序排序。
+`ORDER BY *` 对所有选定的列进行升序排序。
 
 示例:
 
 ``` sql
-SELECT a, b, c FROM t ORDER BY ALL
+SELECT a, b, c FROM t ORDER BY *
 ```
 
 等同于：
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a2c719606d8..f93f7cf2a25 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2357,9 +2357,6 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
                 throw Exception(ErrorCodes::LOGICAL_ERROR,
                                 "Expression nodes list expected 1 projection names. Actual {}",
                                 projection_names.size());
-            if (Poco::toUpper(projection_names[0]) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
         }
 
         auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h
index d8b8741afb2..1b389572e42 100644
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@@ -219,13 +219,13 @@ public:
         is_group_by_all = is_group_by_all_value;
     }
 
-    /// Returns true, if query node has ORDER BY ALL modifier, false otherwise
+    /// Returns true, if query node has ORDER BY * modifier, false otherwise
     bool isOrderByAll() const
     {
         return is_order_by_all;
     }
 
-    /// Set query node ORDER BY ALL modifier value
+    /// Set query node ORDER BY * modifier value
     void setIsOrderByAll(bool is_order_by_all_value)
     {
         is_order_by_all = is_order_by_all_value;
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 0a260969cd4..14fbc9ebebb 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -787,23 +787,6 @@ void expandOrderByAll(ASTSelectQuery * select_query)
 
     for (const auto & expr : select_query->select()->children)
     {
-        if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
-        {
-            if (identifier->alias.empty())
-            {
-                if (Poco::toUpper(identifier->name()) == "ALL")
-                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all'");
-            }
-            else
-            {
-                if (Poco::toUpper(identifier->alias) == "ALL")
-                    throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column alias with name 'all'");
-            }
-        }
-        if (auto * function = expr->as<ASTFunction>(); function != nullptr)
-            if (Poco::toUpper(function->alias) == "ALL")
-                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort an expression with name 'all'");
-
         auto elem = std::make_shared<ASTOrderByElement>();
         elem->direction = all_elem->direction;
         elem->nulls_direction = all_elem->nulls_direction;
@@ -1330,7 +1313,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     if (select_query->group_by_all)
         expandGroupByAll(select_query);
 
-    // expand ORDER BY ALL
+    // expand ORDER BY *
     if (select_query->order_by_all)
         expandOrderByAll(select_query);
 
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 2115de1c124..d38e0933981 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -165,7 +165,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
 
     if (order_by_all)
     {
-        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY *" << (s.hilite ? hilite_none : "");
 
         auto * elem = orderBy()->children[0]->as<ASTOrderByElement>();
         s.ostr << (s.hilite ? hilite_keyword : "")
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 641e74b5f18..6397a2a2a55 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -1,21 +1,23 @@
-#include <memory>
+#include <Parsers/ParserSelectQuery.h>
+
+#include <Parsers/ASTAsterisk.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTInterpolateElement.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTOrderByElement.h>
 #include <Parsers/ASTSelectQuery.h>
-#include <Parsers/IParserBase.h>
 #include <Parsers/CommonParsers.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ParserSetQuery.h>
+#include <Parsers/IParserBase.h>
 #include <Parsers/ParserSampleRatio.h>
-#include <Parsers/ParserSelectQuery.h>
+#include <Parsers/ParserSetQuery.h>
 #include <Parsers/ParserTablesInSelectQuery.h>
 #include <Parsers/ParserWithElement.h>
-#include <Parsers/ASTOrderByElement.h>
-#include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTInterpolateElement.h>
-#include <Parsers/ASTIdentifier.h>
 #include <Poco/String.h>
 
+#include <memory>
 
 namespace DB
 {
@@ -290,9 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         }
         else if (order_expression_list->children.size() == 1)
         {
-            /// ORDER BY ALL
-            auto * identifier = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTIdentifier>();
-            if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL")
+            /// ORDER BY *
+            auto * asterisk = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTAsterisk>();
+            if (asterisk != nullptr)
                 select_query->order_by_all = true;
         }
     }
diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql
index 0eeab99e539..b1fa526e33f 100644
--- a/tests/queries/0_stateless/02567_and_consistency.sql
+++ b/tests/queries/0_stateless/02567_and_consistency.sql
@@ -5,7 +5,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
-ORDER BY ALL
+ORDER BY *
 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT '=====';
@@ -17,7 +17,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(1)
-ORDER BY ALL
+ORDER BY *
 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT '=====';
@@ -29,7 +29,7 @@ FROM
 )
 GROUP BY number
 HAVING x AND sin(sum(number))
-ORDER BY ALL
+ORDER BY *
 SETTINGS enable_optimize_predicate_expression = 1;
 
 SELECT '=====';
@@ -41,7 +41,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
-ORDER BY ALL
+ORDER BY *
 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT '=====';
@@ -61,7 +61,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
-ORDER BY ALL
+ORDER BY *
 SETTINGS enable_optimize_predicate_expression = 1;
 
 select '#45440';
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index fddbf41f0e5..95604c6f401 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -29,13 +29,13 @@ CREATE TABLE t
 INSERT INTO t VALUES ('', '') ('abc', '') ('', 'abc') ('abc', 'abc') ('abc', 'ab') ('abc', 'bc') ('clickhouse', 'mouse');
 
 SELECT '-- non-const arguments';
-SELECT 'byteHammingDistance', s1, s2, byteHammingDistance(s1, s2) FROM t ORDER BY ALL;
-SELECT 'editDistance', s1, s2, editDistance(s1, s2) FROM t ORDER BY ALL;
-SELECT 'damerauLevenshteinDistance', s1, s2, damerauLevenshteinDistance(s1, s2) FROM t ORDER BY ALL;
-SELECT 'stringJaccardIndex', s1, s2, stringJaccardIndex(s1, s2) FROM t ORDER BY ALL;
-SELECT 'stringJaccardIndexUTF8', s1, s2, stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY ALL;
-SELECT 'jaroSimilarity', s1, s2, jaroSimilarity(s1, s2) FROM t ORDER BY ALL;
-SELECT 'jaroWinklerSimilarity', s1, s2, jaroWinklerSimilarity(s1, s2) FROM t ORDER BY ALL;
+SELECT 'byteHammingDistance', s1, s2, byteHammingDistance(s1, s2) FROM t ORDER BY *;
+SELECT 'editDistance', s1, s2, editDistance(s1, s2) FROM t ORDER BY *;
+SELECT 'damerauLevenshteinDistance', s1, s2, damerauLevenshteinDistance(s1, s2) FROM t ORDER BY *;
+SELECT 'stringJaccardIndex', s1, s2, stringJaccardIndex(s1, s2) FROM t ORDER BY *;
+SELECT 'stringJaccardIndexUTF8', s1, s2, stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY *;
+SELECT 'jaroSimilarity', s1, s2, jaroSimilarity(s1, s2) FROM t ORDER BY *;
+SELECT 'jaroWinklerSimilarity', s1, s2, jaroWinklerSimilarity(s1, s2) FROM t ORDER BY *;
 
 SELECT '-- Special UTF-8 tests';
 -- We do not perform full UTF8 validation, so sometimes it just returns some result
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index d91f6dfc4a5..ef399fe8e2d 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -49,39 +49,9 @@ A	2
 2	A
 3	B
 \N	C
--- "ALL" in ORDER BY is case-insensitive
+-- Special case: all columns in SELECT clause, ORDER BY *
 A	2
 B	3
 C	\N
 D	1
-A	2
-B	3
-C	\N
-D	1
-A	2
-B	3
-C	\N
-D	1
-A	2
-B	3
-C	\N
-D	1
--- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity
--- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
--- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)
-A	2	30
-B	3	10
-C	\N	40
-D	1	20
-A	2	30
-B	3	10
-C	\N	40
-D	1	20
+-- "*" must appear stand-alone in ORDER BY
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index f10184e79b9..2fe628e9b95 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -1,4 +1,4 @@
--- Tests that sort expression ORDER BY ALL
+-- Tests that sort expression ORDER BY *
 
 DROP TABLE IF EXISTS order_by_all;
 
@@ -6,104 +6,48 @@ CREATE TABLE order_by_all
 (
     a String,
     b Nullable(Int32),
-    all UInt64,
 )
 ENGINE = Memory;
 
-INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
+INSERT INTO order_by_all VALUES ('B', 3), ('C', NULL), ('D', 1), ('A', 2);
 
 SELECT '-- no modifiers';
 
 SET allow_experimental_analyzer = 0;
-SELECT a, b FROM order_by_all ORDER BY ALL;
-SELECT b, a FROM order_by_all ORDER BY ALL;
+SELECT a, b FROM order_by_all ORDER BY *;
+SELECT b, a FROM order_by_all ORDER BY *;
 
 SET allow_experimental_analyzer = 1;
-SELECT a, b FROM order_by_all ORDER BY ALL;
-SELECT b, a FROM order_by_all ORDER BY ALL;
+SELECT a, b FROM order_by_all ORDER BY *;
+SELECT b, a FROM order_by_all ORDER BY *;
 
 SELECT '-- with ASC/DESC modifiers';
 
 SET allow_experimental_analyzer = 0;
-SELECT a, b FROM order_by_all ORDER BY ALL ASC;
-SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+SELECT a, b FROM order_by_all ORDER BY * ASC;
+SELECT a, b FROM order_by_all ORDER BY * DESC;
 
 SET allow_experimental_analyzer = 1;
-SELECT a, b FROM order_by_all ORDER BY ALL ASC;
-SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+SELECT a, b FROM order_by_all ORDER BY * ASC;
+SELECT a, b FROM order_by_all ORDER BY * DESC;
 
 SELECT '-- with NULLS FIRST/LAST modifiers';
 
 SET allow_experimental_analyzer = 0;
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
+SELECT b, a FROM order_by_all ORDER BY * NULLS FIRST;
+SELECT b, a FROM order_by_all ORDER BY * NULLS LAST;
 
 SET allow_experimental_analyzer = 1;
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
+SELECT b, a FROM order_by_all ORDER BY * NULLS FIRST;
+SELECT b, a FROM order_by_all ORDER BY * NULLS LAST;
 
-SELECT '-- "ALL" in ORDER BY is case-insensitive';
+SELECT '-- Special case: all columns in SELECT clause, ORDER BY *';
+SELECT * FROM order_by_all ORDER BY * NULLS LAST;
+
+SELECT '-- "*" must appear stand-alone in ORDER BY';
 
 SET allow_experimental_analyzer = 0;
-SELECT a, b FROM order_by_all ORDER BY ALL;
-SELECT a, b FROM order_by_all ORDER BY all;
+SELECT a, b FROM order_by_all ORDER BY *, a; -- { serverError UNKNOWN_IDENTIFIER }
 
 SET allow_experimental_analyzer = 1;
-SELECT a, b FROM order_by_all ORDER BY ALL;
-SELECT a, b FROM order_by_all ORDER BY all;
-
-SELECT '-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity';
-
--- columns
-
-SET allow_experimental_analyzer = 0;
-SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
-SET allow_experimental_analyzer = 1;
-SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
--- column aliases
-
-SET allow_experimental_analyzer = 0;
-SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
-SET allow_experimental_analyzer = 1;
-SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
--- expressions
-
-SET allow_experimental_analyzer = 0;
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
-SET allow_experimental_analyzer = 1;
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-
-SELECT '-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning';
-
-SET allow_experimental_analyzer = 0;
-SELECT a, b, all FROM order_by_all ORDER BY all, a;
-
-SET allow_experimental_analyzer = 1;
-SELECT a, b, all FROM order_by_all ORDER BY all, a;
-
-DROP TABLE order_by_all;
-
-SELECT '-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)';
-
-CREATE TABLE order_by_all
-(
-    a String,
-    b Nullable(Int32),
-    c UInt64,
-)
-ENGINE = Memory;
-
-INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
-
-SET allow_experimental_analyzer = 0;
-SELECT * FROM order_by_all ORDER BY ALL;
-
-SET allow_experimental_analyzer = 1;
-SELECT * FROM order_by_all ORDER BY ALL;
-
-DROP TABLE order_by_all;
+SELECT a, b FROM order_by_all ORDER BY *, a; -- { serverError UNSUPPORTED_METHOD }
diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.sql b/tests/queries/0_stateless/02962_join_using_bug_57894.sql
index 87aef8b1a71..c9570be7053 100644
--- a/tests/queries/0_stateless/02962_join_using_bug_57894.sql
+++ b/tests/queries/0_stateless/02962_join_using_bug_57894.sql
@@ -11,23 +11,23 @@ INSERT INTO r VALUES (NULL, NULL);
 
 SET allow_experimental_analyzer = 0;
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 ;
 
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 SETTINGS join_algorithm = 'partial_merge';
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 SETTINGS join_algorithm = 'full_sorting_merge';
 
 SET allow_experimental_analyzer = 1;
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 ;
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 SETTINGS join_algorithm = 'partial_merge';
 
-SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL
+SELECT x FROM t FULL JOIN r USING (x) ORDER BY *
 SETTINGS join_algorithm = 'full_sorting_merge';

From dfaea604c375882fd9f4f553ef2576848d11c531 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 15 Feb 2024 17:27:26 +0100
Subject: [PATCH 734/884] Remove redundant includes

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 269094cbf63..a2a6c8e128d 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -79,14 +79,6 @@
 #include <Analyzer/QueryTreeBuilder.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/Identifier.h>
-#include <Parsers/FunctionParameterValuesVisitor.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Storages/StorageView.h>
-#include <Interpreters/InterpreterSelectWithUnionQuery.h>
-#include <Parsers/QueryParameterVisitor.h>
-#include <Poco/Logger.h>
-#include <Common/logger_useful.h>
 
 namespace ProfileEvents
 {

From 5a3a93bea538e947090be4350e7600c7b6935aca Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Mon, 29 Jan 2024 17:13:32 +0000
Subject: [PATCH 735/884] CI: refactor bugfix validate job

 #no_merge_commit
---
 .github/workflows/pull_request.yml |  15 +-
 .gitmessage                        |  10 +-
 tests/ci/bugfix_validate_check.py  | 218 +++++++++++++++------------
 tests/ci/ci.py                     |   9 +-
 tests/ci/ci_config.py              |  16 +-
 tests/ci/clickbench.py             |  19 +--
 tests/ci/commit_status_helper.py   |  19 +--
 tests/ci/fast_test_check.py        |  10 +-
 tests/ci/functional_test_check.py  | 228 +++++++++--------------------
 tests/ci/integration_test_check.py | 131 ++++-------------
 tests/ci/pr_info.py                |   1 -
 tests/ci/report.py                 |  12 +-
 tests/ci/run_check.py              |   5 +-
 tests/ci/sqllogic_test.py          |   7 +-
 tests/ci/upload_result_helper.py   |   7 +-
 tests/integration/ci-runner.py     |   4 +-
 16 files changed, 274 insertions(+), 437 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 405e1ec1502..9f735d210b6 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -500,21 +500,9 @@ jobs:
     if: ${{ !failure() && !cancelled() }}
     uses: ./.github/workflows/reusable_test.yml
     with:
-      test_name: tests bugfix validate check
+      test_name: Bugfix validation
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-      additional_envs: |
-        KILL_TIMEOUT=3600
-      run_command: |
-        TEMP_PATH="${TEMP_PATH}/integration" \
-          python3 integration_test_check.py "Integration $CHECK_NAME" \
-            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
-
-        TEMP_PATH="${TEMP_PATH}/stateless" \
-          python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
-            --validate-bugfix --post-commit-status=file || echo "ignore exit code"
-
-        python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
@@ -915,6 +903,7 @@ jobs:
       - BuilderSpecialReport
       - DocsCheck
       - FastTest
+      - TestsBugfixCheck
       - FunctionalStatelessTestDebug
       - FunctionalStatelessTestRelease
       - FunctionalStatelessTestReleaseDatabaseReplicated
diff --git a/.gitmessage b/.gitmessage
index 200d19e774d..760cfec97a4 100644
--- a/.gitmessage
+++ b/.gitmessage
@@ -1,6 +1,6 @@
 
 
-### CI modificators (add a leading space to apply):
+### CI modificators (add a leading space to apply) ###
 
 ## To avoid a merge commit in CI:
 #no_merge_commit
@@ -8,13 +8,21 @@
 ## To discard CI cache:
 #no_ci_cache
 
+## To not test (only style check):
+#do_not_test
+
 ## To run specified set of tests in CI:
 #ci_set_<SET_NAME>
 #ci_set_reduced
 #ci_set_arm
+#ci_set_integration
 
 ## To run specified job in CI:
 #job_<JOB NAME>
 #job_stateless_tests_release
 #job_package_debug
 #job_integration_tests_asan
+
+## To run only specified batches for multi-batch job(s)
+#batch_2
+#btach_1_2_3
diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py
index 7fda81f11b2..ae7fce1f102 100644
--- a/tests/ci/bugfix_validate_check.py
+++ b/tests/ci/bugfix_validate_check.py
@@ -1,28 +1,28 @@
 #!/usr/bin/env python3
 
-import argparse
+from pathlib import Path
+import subprocess
+import sys
+from typing import List, Sequence, Tuple
 import csv
 import logging
-from pathlib import Path
-from typing import List, Optional, Tuple
 
-# isort: off
-from github import Github
-
-# isort: on
-
-from commit_status_helper import get_commit, post_commit_status
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
-from report import ERROR, SUCCESS, TestResult, TestResults
-from s3_helper import S3Helper
-from upload_result_helper import upload_results
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("files", nargs="+", type=Path, help="Path to status files")
-    return parser.parse_args()
+from report import (
+    ERROR,
+    FAILURE,
+    SKIPPED,
+    SUCCESS,
+    FAIL,
+    OK,
+    TestResult,
+    TestResults,
+    JobReport,
+)
+from env_helper import TEMP_PATH
+from stopwatch import Stopwatch
+from ci_config import JobNames
+from ci_utils import normalize_string
+from functional_test_check import NO_CHANGES_MSG
 
 
 def post_commit_status_from_file(file_path: Path) -> List[str]:
@@ -35,93 +35,123 @@ def post_commit_status_from_file(file_path: Path) -> List[str]:
     return res[0]
 
 
-# Returns (is_ok, test_results, error_message)
-def process_result(file_path: Path) -> Tuple[bool, TestResults, Optional[str]]:
-    test_results = []  # type: TestResults
-    state, report_url, description = post_commit_status_from_file(file_path)
-    prefix = file_path.parent.name
-    if description.strip() in [
-        "Invalid check_status.tsv",
-        "Not found test_results.tsv",
-        "Empty test_results.tsv",
-    ]:
-        status = (
-            f'Check failed (<a href="{report_url}">Report</a>)'
-            if report_url != "null"
-            else "Check failed"
-        )
-        return False, [TestResult(f"{prefix}: {description}", status)], "Check failed"
-
-    is_ok = state == SUCCESS
-    if is_ok and report_url == "null":
-        return is_ok, test_results, None
-
-    status = (
-        f'OK: Bug reproduced (<a href="{report_url}">Report</a>)'
-        if is_ok
-        else f'Bug is not reproduced (<a href="{report_url}">Report</a>)'
-    )
-    test_results.append(TestResult(f"{prefix}: {description}", status))
-    return is_ok, test_results, None
+def get_failed_test_cases(file_path: Path) -> List[TestResult]:
+    job_report = JobReport.load(from_file=file_path)
+    test_results = []  # type: List[TestResult]
+    for tr in job_report.test_results:
+        if tr.status == FAIL:
+            if tr.name == NO_CHANGES_MSG:
+                tr.status = SKIPPED
+            else:
+                tr.name = "[with NOT_OK]   " + tr.name
+                tr.status = OK
+        elif tr.status == OK:
+            tr.name = "[with NOT_OK]   " + tr.name
+            tr.status = FAIL
+        else:
+            # do not invert error status
+            pass
+        test_results.append(tr)
+    return test_results
 
 
 def process_all_results(
-    file_paths: List[Path],
-) -> Tuple[bool, TestResults, Optional[str]]:
-    any_ok = False
-    all_results = []
-    error = None
-    for status_path in file_paths:
-        is_ok, test_results, error = process_result(status_path)
-        any_ok = any_ok or is_ok
-        if test_results is not None:
-            all_results.extend(test_results)
+    file_paths: Sequence[Path],
+) -> Tuple[str, str, TestResults]:
+    all_results = []  # type: TestResults
+    has_fail = False
+    has_error = False
+    has_ok = False
+    for job_report_path in file_paths:
+        test_results = get_failed_test_cases(job_report_path)
+        for tr in test_results:
+            if tr.status == FAIL:
+                has_fail = True
+            elif tr.status == ERROR:
+                has_error = True
+            elif tr.status == OK:
+                has_ok = True
+        all_results.extend(test_results)
+    if has_error:
+        status = ERROR
+        description = "Some error(s) occured in tests"
+    elif has_ok:
+        status = SUCCESS
+        description = "New test(s) reproduced a bug"
+    elif has_fail:
+        status = FAILURE
+        description = "New test(s) failed to reproduce a bug"
+    else:
+        status = ERROR
+        description = "Invalid job results"
 
-    return any_ok and error is None, all_results, error
+    return status, description, all_results
 
 
 def main():
     logging.basicConfig(level=logging.INFO)
-    args = parse_args()
-    status_files = args.files  # type: List[Path]
+    # args = parse_args()
+    stopwatch = Stopwatch()
+    jobs_to_validate = [JobNames.STATELESS_TEST_RELEASE, JobNames.INTEGRATION_TEST]
+    functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json"
+    integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json"
+    jobs_report_files = {
+        JobNames.STATELESS_TEST_RELEASE: functional_job_report_file,
+        JobNames.INTEGRATION_TEST: integration_job_report_file,
+    }
+    jobs_scripts = {
+        JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py",
+        JobNames.INTEGRATION_TEST: "integration_test_check.py",
+    }
 
-    check_name_with_group = "Bugfix validate check"
-
-    is_ok, test_results, error = process_all_results(status_files)
-
-    description = ""
-    if error:
-        description = error
-    elif not is_ok:
-        description = "Changed tests don't reproduce the bug"
-
-    pr_info = PRInfo()
-    if not test_results:
-        description = "No results to upload"
-        report_url = ""
-        logging.info("No results to upload")
-    else:
-        report_url = upload_results(
-            S3Helper(),
-            pr_info.number,
-            pr_info.sha,
-            test_results,
-            status_files,
-            check_name_with_group,
+    for test_job in jobs_to_validate:
+        report_file = jobs_report_files[test_job]
+        test_script = jobs_scripts[test_job]
+        if report_file.exists():
+            report_file.unlink()
+        extra_timeout_option = ""
+        if test_job == JobNames.STATELESS_TEST_RELEASE:
+            extra_timeout_option = str(3600)
+        # "bugfix" must be present in checkname, as integration test runner checks this
+        check_name = f"Validate bugfix: {test_job}"
+        command = f"python3 {test_script} '{check_name}' {extra_timeout_option} --validate-bugfix --report-to-file {report_file}"
+        print(f"Going to validate job [{test_job}], command [{command}]")
+        _ = subprocess.run(
+            command,
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+            text=True,
+            check=False,
+            shell=True,
         )
+        assert (
+            report_file.is_file()
+        ), f"No job report [{report_file}] found after job execution"
 
-    gh = Github(get_best_robot_token(), per_page=100)
-    commit = get_commit(gh, pr_info.sha)
-    post_commit_status(
-        commit,
-        SUCCESS if is_ok else ERROR,
-        report_url,
-        description,
-        check_name_with_group,
-        pr_info,
-        dump_to_file=True,
+    status, description, test_results = process_all_results(
+        list(jobs_report_files.values())
     )
 
+    additional_files = []
+    for job_id, report_file in jobs_report_files.items():
+        jr = JobReport.load(from_file=report_file)
+        additional_files.append(report_file)
+        for file in set(jr.additional_files):
+            file_ = Path(file)
+            file_name = file_.name
+            file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1)
+            file_ = file_.rename(file_.parent / file_name)
+            additional_files.append(file_)
+
+    JobReport(
+        description=description,
+        test_results=test_results,
+        status=status,
+        start_time=stopwatch.start_time_str,
+        duration=stopwatch.duration_seconds,
+        additional_files=additional_files,
+    ).dump()
+
 
 if __name__ == "__main__":
     main()
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 47e20b3ec09..819152fadc3 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1284,10 +1284,13 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
             if CI_CONFIG.is_build_job(job):
                 # no GH status for build jobs
                 continue
-            num_batches = CI_CONFIG.get_job_config(job).num_batches
-            for batch in range(num_batches):
+            job_config = CI_CONFIG.get_job_config(job)
+            if not job_config:
+                # there might be a new job that does not exist on this branch - skip it
+                continue
+            for batch in range(job_config.num_batches):
                 future = executor.submit(
-                    _concurrent_create_status, job, batch, num_batches
+                    _concurrent_create_status, job, batch, job_config.num_batches
                 )
                 futures.append(future)
         done, _ = concurrent.futures.wait(futures)
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index db5a83d5b96..3ebcbb7ed59 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -141,7 +141,7 @@ class JobNames(metaclass=WithIter):
     BUILD_CHECK_SPECIAL = "ClickHouse special build check"
 
     DOCS_CHECK = "Docs check"
-    BUGFIX_VALIDATE = "tests bugfix validate check"
+    BUGFIX_VALIDATE = "Bugfix validation"
 
 
 # dynamically update JobName with Build jobs
@@ -282,7 +282,6 @@ class BuildReportConfig:
 @dataclass
 class TestConfig:
     required_build: str
-    force_tests: bool = False
     job_config: JobConfig = field(default_factory=JobConfig)
 
 
@@ -302,6 +301,7 @@ install_check_digest = DigestConfig(
 )
 stateless_check_digest = DigestConfig(
     include_paths=[
+        "./tests/ci/functional_test_check.py",
         "./tests/queries/0_stateless/",
         "./tests/clickhouse-test",
         "./tests/config",
@@ -312,6 +312,7 @@ stateless_check_digest = DigestConfig(
 )
 stateful_check_digest = DigestConfig(
     include_paths=[
+        "./tests/ci/functional_test_check.py",
         "./tests/queries/1_stateful/",
         "./tests/clickhouse-test",
         "./tests/config",
@@ -473,9 +474,6 @@ class CIConfig:
             if check_name in config:  # type: ignore
                 res = config[check_name].job_config  # type: ignore
                 break
-        assert (
-            res is not None
-        ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]"
         return res  # type: ignore
 
     @staticmethod
@@ -890,7 +888,9 @@ CI_CONFIG = CIConfig(
         JobNames.BUGFIX_VALIDATE: TestConfig(
             "",
             # we run this check by label - no digest required
-            job_config=JobConfig(run_by_label="pr-bugfix"),
+            job_config=JobConfig(
+                run_by_label="pr-bugfix", run_command="bugfix_validate_check.py"
+            ),
         ),
     },
     test_configs={
@@ -1173,10 +1173,10 @@ CHECK_DESCRIPTIONS = [
         lambda x: x.startswith("AST fuzzer"),
     ),
     CheckDescription(
-        "Bugfix validate check",
+        JobNames.BUGFIX_VALIDATE,
         "Checks that either a new test (functional or integration) or there "
         "some changed tests that fail with the binary built on master branch",
-        lambda x: x == "Bugfix validate check",
+        lambda x: x == JobNames.BUGFIX_VALIDATE,
     ),
     CheckDescription(
         "CI running",
diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index f8707cbcff7..50c7bb85d28 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -10,14 +10,15 @@ from pathlib import Path
 from typing import List, Tuple
 
 from build_download_helper import download_all_deb_packages
-from clickhouse_helper import CiLogsCredentials
-from commit_status_helper import override_status
-from docker_images_helper import DockerImage, get_docker_image, pull_image
-from env_helper import REPORT_PATH, TEMP_PATH
-from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import ERROR, SUCCESS, JobReport, StatusType, TestResults
+from clickhouse_helper import (
+    CiLogsCredentials,
+)
+from docker_images_helper import get_docker_image, pull_image, DockerImage
+from env_helper import TEMP_PATH, REPORT_PATH
+from pr_info import PRInfo
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from report import ERROR, SUCCESS, JobReport, StatusType, TestResults
 
 
 def get_image_name() -> str:
@@ -164,7 +165,6 @@ def main():
     state, description, test_results, additional_logs = process_results(
         result_path, server_log_path
     )
-    state = override_status(state, check_name)
 
     JobReport(
         description=description,
@@ -176,10 +176,7 @@ def main():
     ).dump()
 
     if state != SUCCESS:
-        if FORCE_TESTS_LABEL in pr_info.labels:
-            print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
-        else:
-            sys.exit(1)
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 8a34d375d1e..b7128e36434 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -18,9 +18,7 @@ from github.GithubObject import NotSet
 from github.IssueComment import IssueComment
 from github.Repository import Repository
 
-# isort: on
-
-from ci_config import CHECK_DESCRIPTIONS, CI_CONFIG, REQUIRED_CHECKS, CheckDescription
+from ci_config import REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
 from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH
 from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo
 from report import (
@@ -67,21 +65,6 @@ class RerunHelper:
         return None
 
 
-def override_status(
-    status: StatusType, check_name: str, invert: bool = False
-) -> StatusType:
-    test_config = CI_CONFIG.test_configs.get(check_name)
-    if test_config and test_config.force_tests:
-        return SUCCESS
-
-    if invert:
-        if status == SUCCESS:
-            return ERROR
-        return SUCCESS
-
-    return status
-
-
 def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
     for i in range(retry_count):
         try:
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index e483e9d4ac2..5d528bb4c48 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -10,7 +10,7 @@ from typing import Tuple
 
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH
-from pr_info import FORCE_TESTS_LABEL, PRInfo
+from pr_info import PRInfo
 from report import (
     ERROR,
     FAILURE,
@@ -190,13 +190,7 @@ def main():
 
     # Refuse other checks to run if fast test failed
     if state != SUCCESS:
-        if state == ERROR:
-            print("The status is 'error', report failure disregard the labels")
-            sys.exit(1)
-        elif FORCE_TESTS_LABEL in pr_info.labels:
-            print(f"'{FORCE_TESTS_LABEL}' enabled, reporting success")
-        else:
-            sys.exit(1)
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index e230aa5a679..da2dea60fc1 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 
 import argparse
-import atexit
 import csv
 import logging
 import os
@@ -11,34 +10,16 @@ import sys
 from pathlib import Path
 from typing import List, Tuple
 
-# isort: off
-from github import Github
-
-# isort: on
-
 from build_download_helper import download_all_deb_packages
-from clickhouse_helper import (
-    CiLogsCredentials,
-    ClickHouseHelper,
-    prepare_tests_results_for_clickhouse,
-)
-from commit_status_helper import (
-    get_commit,
-    override_status,
-    post_commit_status,
-    post_commit_status_to_file,
-    update_mergeable_check,
-)
-from docker_images_helper import DockerImage, get_docker_image, pull_image
+from clickhouse_helper import CiLogsCredentials
+
+from docker_images_helper import DockerImage, pull_image, get_docker_image
 from download_release_packages import download_last_release
-from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
-from get_robot_token import get_best_robot_token
-from pr_info import FORCE_TESTS_LABEL, PRInfo
-from report import ERROR, SUCCESS, StatusType, TestResults, read_test_results
-from s3_helper import S3Helper
+from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
+from pr_info import PRInfo
+from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
-from upload_result_helper import upload_results
 
 NO_CHANGES_MSG = "Nothing to run"
 
@@ -130,7 +111,7 @@ def get_run_command(
     )
 
 
-def get_tests_to_run(pr_info: PRInfo) -> List[str]:
+def _get_statless_tests_to_run(pr_info: PRInfo) -> List[str]:
     result = set()
 
     if pr_info.changed_files is None:
@@ -213,10 +194,10 @@ def parse_args():
         help="Check that added tests failed on latest stable",
     )
     parser.add_argument(
-        "--post-commit-status",
-        default="commit_status",
-        choices=["commit_status", "file"],
-        help="Where to public post commit status",
+        "--report-to-file",
+        type=str,
+        default="",
+        help="Path to write script report to (for --validate-bugfix)",
     )
     return parser.parse_args()
 
@@ -232,7 +213,6 @@ def main():
     reports_path.mkdir(parents=True, exist_ok=True)
 
     repo_path = Path(REPO_COPY)
-    post_commit_path = temp_path / "functional_commit_status.tsv"
 
     args = parse_args()
     check_name = args.check_name or os.getenv("CHECK_NAME")
@@ -249,62 +229,20 @@ def main():
     flaky_check = "flaky" in check_name.lower()
 
     run_changed_tests = flaky_check or validate_bugfix_check
-
-    # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used
-    pr_info = PRInfo(
-        need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check
-    )
-
-    # FIXME: move to job report and remove
-    gh = Github(get_best_robot_token(), per_page=100)
-    commit = get_commit(gh, pr_info.sha)
-    atexit.register(update_mergeable_check, commit, pr_info, check_name)
-
-    if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
-        if args.post_commit_status == "file":
-            post_commit_status_to_file(
-                post_commit_path,
-                f"Skipped (no pr-bugfix in {pr_info.labels})",
-                SUCCESS,
-                "null",
-            )
-        logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels)
-        sys.exit(0)
+    pr_info = PRInfo(need_changed_files=run_changed_tests)
+    tests_to_run = []
+    if run_changed_tests:
+        assert (
+            args.report_to_file
+        ), "JobReport file path must be provided with --validate-bugfix"
+        tests_to_run = _get_statless_tests_to_run(pr_info)
 
     if "RUN_BY_HASH_NUM" in os.environ:
         run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
         run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0"))
-        check_name_with_group = (
-            check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
-        )
     else:
         run_by_hash_num = 0
         run_by_hash_total = 0
-        check_name_with_group = check_name
-
-    tests_to_run = []
-    if run_changed_tests:
-        tests_to_run = get_tests_to_run(pr_info)
-        if not tests_to_run:
-            state = override_status(SUCCESS, check_name, validate_bugfix_check)
-            if args.post_commit_status == "commit_status":
-                post_commit_status(
-                    commit,
-                    state,
-                    "",
-                    NO_CHANGES_MSG,
-                    check_name_with_group,
-                    pr_info,
-                    dump_to_file=True,
-                )
-            elif args.post_commit_status == "file":
-                post_commit_status_to_file(
-                    post_commit_path,
-                    description=NO_CHANGES_MSG,
-                    state=state,
-                    report_url="null",
-                )
-            sys.exit(0)
 
     image_name = get_image_name(check_name)
 
@@ -338,91 +276,65 @@ def main():
         pr_info, stopwatch.start_time_str, check_name
     )
 
-    run_command = get_run_command(
-        check_name,
-        packages_path,
-        repo_path,
-        result_path,
-        server_log_path,
-        kill_timeout,
-        additional_envs,
-        ci_logs_args,
-        docker_image,
-        flaky_check,
-        tests_to_run,
-    )
-    logging.info("Going to run func tests: %s", run_command)
-
-    with TeePopen(run_command, run_log_path) as process:
-        retcode = process.wait()
-        if retcode == 0:
-            logging.info("Run successfully")
-        else:
-            logging.info("Run failed")
-
-    try:
-        subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
-    except subprocess.CalledProcessError:
-        logging.warning("Failed to change files owner in %s, ignoring it", temp_path)
-
-    ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path)
-    s3_helper = S3Helper()
-
-    state, description, test_results, additional_logs = process_results(
-        result_path, server_log_path
-    )
-    state = override_status(state, check_name, invert=validate_bugfix_check)
-
-    ch_helper = ClickHouseHelper()
-
-    report_url = upload_results(
-        s3_helper,
-        pr_info.number,
-        pr_info.sha,
-        test_results,
-        [run_log_path] + additional_logs,
-        check_name_with_group,
-    )
-
-    print(f"::notice:: {check_name} Report url: {report_url}")
-    if args.post_commit_status == "commit_status":
-        post_commit_status(
-            commit,
-            state,
-            report_url,
-            description,
-            check_name_with_group,
-            pr_info,
-            dump_to_file=True,
+    if (not validate_bugfix_check and not flaky_check) or tests_to_run:
+        run_command = get_run_command(
+            check_name,
+            packages_path,
+            repo_path,
+            result_path,
+            server_log_path,
+            kill_timeout,
+            additional_envs,
+            ci_logs_args,
+            docker_image,
+            flaky_check,
+            tests_to_run,
         )
-    elif args.post_commit_status == "file":
-        post_commit_status_to_file(
-            post_commit_path,
-            description,
-            state,
-            report_url,
+        logging.info("Going to run func tests: %s", run_command)
+
+        with TeePopen(run_command, run_log_path) as process:
+            retcode = process.wait()
+            if retcode == 0:
+                logging.info("Run successfully")
+            else:
+                logging.info("Run failed")
+
+        try:
+            subprocess.check_call(
+                f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True
+            )
+        except subprocess.CalledProcessError:
+            logging.warning(
+                "Failed to change files owner in %s, ignoring it", temp_path
+            )
+
+        ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path)
+
+        state, description, test_results, additional_logs = process_results(
+            result_path, server_log_path
         )
     else:
-        raise Exception(
-            f'Unknown post_commit_status option "{args.post_commit_status}"'
+        print(
+            "This is validate bugfix or flaky check run, but no changes test to run - skip with success"
+        )
+        state, description, test_results, additional_logs = (
+            SUCCESS,
+            "No tests to run",
+            [],
+            [],
         )
 
-    prepared_events = prepare_tests_results_for_clickhouse(
-        pr_info,
-        test_results,
-        state,
-        stopwatch.duration_seconds,
-        stopwatch.start_time_str,
-        report_url,
-        check_name_with_group,
-    )
-    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+    JobReport(
+        description=description,
+        test_results=test_results,
+        status=state,
+        start_time=stopwatch.start_time_str,
+        duration=stopwatch.duration_seconds,
+        additional_files=additional_logs,
+    ).dump(to_file=args.report_to_file if args.report_to_file else None)
 
     if state != SUCCESS:
-        if FORCE_TESTS_LABEL in pr_info.labels:
-            print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
-        else:
-            sys.exit(1)
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index 5af4d5e625b..751abf617fa 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -5,38 +5,27 @@ import csv
 import json
 import logging
 import os
-import subprocess
 import sys
 from pathlib import Path
 from typing import Dict, List, Tuple
 
 from build_download_helper import download_all_deb_packages
-from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import (
-    get_commit,
-    override_status,
-    post_commit_status,
-    post_commit_status_to_file,
-)
 from docker_images_helper import DockerImage, get_docker_image
 from download_release_packages import download_last_release
 from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
-from get_robot_token import get_best_robot_token
-from github_helper import GitHub
 from integration_test_images import IMAGES
 from pr_info import PRInfo
 from report import (
     ERROR,
     SUCCESS,
     StatusType,
+    JobReport,
     TestResult,
     TestResults,
     read_test_results,
 )
-from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
-from upload_result_helper import upload_results
 
 
 def get_json_params_dict(
@@ -131,16 +120,19 @@ def process_results(
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("check_name")
+    parser.add_argument(
+        "--run-tests", nargs="*", help="List of tests to run", default=None
+    )
     parser.add_argument(
         "--validate-bugfix",
         action="store_true",
         help="Check that added tests failed on latest stable",
     )
     parser.add_argument(
-        "--post-commit-status",
-        default="commit_status",
-        choices=["commit_status", "file"],
-        help="Where to public post commit status",
+        "--report-to-file",
+        type=str,
+        default="",
+        help="Path to write script report to (for --validate-bugfix)",
     )
     return parser.parse_args()
 
@@ -154,7 +146,6 @@ def main():
     reports_path = Path(REPORT_PATH)
     temp_path.mkdir(parents=True, exist_ok=True)
 
-    post_commit_path = temp_path / "integration_commit_status.tsv"
     repo_path = Path(REPO_COPY)
 
     args = parse_args()
@@ -167,37 +158,19 @@ def main():
     if "RUN_BY_HASH_NUM" in os.environ:
         run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
         run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0"))
-        check_name_with_group = (
-            check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
-        )
     else:
         run_by_hash_num = 0
         run_by_hash_total = 0
-        check_name_with_group = check_name
 
     is_flaky_check = "flaky" in check_name
 
+    assert (
+        not validate_bugfix_check or args.report_to_file
+    ), "--report-to-file must be provided for --validate-bugfix"
+
     # For validate_bugfix_check we need up to date information about labels, so
     # pr_event_from_api is used
-    pr_info = PRInfo(
-        need_changed_files=is_flaky_check or validate_bugfix_check,
-        pr_event_from_api=validate_bugfix_check,
-    )
-
-    if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
-        if args.post_commit_status == "file":
-            post_commit_status_to_file(
-                post_commit_path,
-                f"Skipped (no pr-bugfix in {pr_info.labels})",
-                SUCCESS,
-                "null",
-            )
-        logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels)
-        sys.exit(0)
-
-    # FIXME: switch to JobReport and remove:
-    gh = GitHub(get_best_robot_token())
-    commit = get_commit(gh, pr_info.sha)
+    pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugfix_check)
 
     images = [get_docker_image(image_) for image_ in IMAGES]
 
@@ -245,7 +218,7 @@ def main():
         ),
     )
 
-    ch_helper = ClickHouseHelper()
+    integration_infrastructure_fail = False
     with TeePopen(run_command, output_path_log, my_env) as process:
         retcode = process.wait()
         if retcode == 0:
@@ -254,73 +227,31 @@ def main():
             logging.warning(
                 "There were issues with infrastructure. Not writing status report to restart job."
             )
-            prepared_events = prepare_tests_results_for_clickhouse(
-                pr_info,
-                [
-                    TestResult(
-                        "integration_infrastructure_fail",
-                        "ERROR",
-                        stopwatch.duration_seconds,
-                    )
-                ],
-                ERROR,
-                stopwatch.duration_seconds,
-                stopwatch.start_time_str,
-                "",
-                check_name_with_group,
-            )
-
-            ch_helper.insert_events_into(
-                db="default", table="checks", events=prepared_events
-            )
+            integration_infrastructure_fail = True
             sys.exit(1)
         else:
             logging.info("Some tests failed")
 
-    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    # subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
 
-    state, description, test_results, additional_logs = process_results(result_path)
-    state = override_status(state, check_name, invert=validate_bugfix_check)
-
-    s3_helper = S3Helper()
-    report_url = upload_results(
-        s3_helper,
-        pr_info.number,
-        pr_info.sha,
-        test_results,
-        [output_path_log] + additional_logs,
-        check_name_with_group,
-    )
-
-    print(f"::notice:: {check_name} Report url: {report_url}")
-    if args.post_commit_status == "commit_status":
-        post_commit_status(
-            commit,
-            state,
-            report_url,
-            description,
-            check_name_with_group,
-            pr_info,
-            dump_to_file=True,
-        )
-    elif args.post_commit_status == "file":
-        post_commit_status_to_file(post_commit_path, description, state, report_url)
+    if not integration_infrastructure_fail:
+        state, description, test_results, additional_logs = process_results(result_path)
     else:
-        raise Exception(
-            f'Unknown post_commit_status option "{args.post_commit_status}"'
+        state, description, test_results, additional_logs = (
+            ERROR,
+            "no description",
+            [TestResult("infrastructure error", ERROR, stopwatch.duration_seconds)],
+            [],
         )
 
-    prepared_events = prepare_tests_results_for_clickhouse(
-        pr_info,
-        test_results,
-        state,
-        stopwatch.duration_seconds,
-        stopwatch.start_time_str,
-        report_url,
-        check_name_with_group,
-    )
-
-    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+    JobReport(
+        description=description,
+        test_results=test_results,
+        status=state,
+        start_time=stopwatch.start_time_str,
+        duration=stopwatch.duration_seconds,
+        additional_files=[output_path_log] + additional_logs,
+    ).dump(to_file=args.report_to_file if args.report_to_file else None)
 
     if state != SUCCESS:
         sys.exit(1)
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index 744de7dea72..70f358e8070 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -19,7 +19,6 @@ from env_helper import (
     GITHUB_SERVER_URL,
 )
 
-FORCE_TESTS_LABEL = "force tests"
 SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check"
 NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]]
 
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 55d1f604605..282c343eec3 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -34,6 +34,7 @@ SUCCESS: Final = "success"
 
 OK: Final = "OK"
 FAIL: Final = "FAIL"
+SKIPPED: Final = "SKIPPED"
 
 StatusType = Literal["error", "failure", "pending", "success"]
 STATUSES = [ERROR, FAILURE, PENDING, SUCCESS]  # type: List[StatusType]
@@ -292,9 +293,10 @@ class JobReport:
         return JOB_REPORT_FILE.is_file()
 
     @classmethod
-    def load(cls):  # type: ignore
+    def load(cls, from_file=None):  # type: ignore
         res = {}
-        with open(JOB_REPORT_FILE, "r") as json_file:
+        from_file = from_file or JOB_REPORT_FILE
+        with open(from_file, "r") as json_file:
             res = json.load(json_file)
             # Deserialize the nested lists of TestResult
             test_results_data = res.get("test_results", [])
@@ -307,13 +309,14 @@ class JobReport:
         if JOB_REPORT_FILE.exists():
             JOB_REPORT_FILE.unlink()
 
-    def dump(self):
+    def dump(self, to_file=None):
         def path_converter(obj):
             if isinstance(obj, Path):
                 return str(obj)
             raise TypeError("Type not serializable")
 
-        with open(JOB_REPORT_FILE, "w") as json_file:
+        to_file = to_file or JOB_REPORT_FILE
+        with open(to_file, "w") as json_file:
             json.dump(asdict(self), json_file, default=path_converter, indent=2)
 
 
@@ -594,7 +597,6 @@ class ReportColorTheme:
         blue = "#00B4FF"
 
     default = (ReportColor.green, ReportColor.red, ReportColor.yellow)
-    bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue)
 
 
 ColorTheme = Tuple[str, str, str]
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 2aeac5b5740..09d50c902d8 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -26,7 +26,7 @@ from lambda_shared_package.lambda_shared.pr import (
     TRUSTED_CONTRIBUTORS,
     check_pr_description,
 )
-from pr_info import FORCE_TESTS_LABEL, PRInfo
+from pr_info import PRInfo
 from report import FAILURE, PENDING
 
 TRUSTED_ORG_IDS = {
@@ -66,9 +66,6 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
 def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]:
     # Consider the labels and whether the user is trusted.
     print("Got labels", pr_info.labels)
-    if FORCE_TESTS_LABEL in pr_info.labels:
-        print(f"Label '{FORCE_TESTS_LABEL}' set, forcing remaining checks")
-        return True, f"Labeled '{FORCE_TESTS_LABEL}'"
 
     if OK_SKIP_LABELS.intersection(pr_info.labels):
         return True, "Don't try new checks for release/backports/cherry-picks"
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index bbd81fd76bb..e9a109e425e 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -9,9 +9,8 @@ from pathlib import Path
 from typing import Tuple
 
 from build_download_helper import download_all_deb_packages
-from commit_status_helper import override_status
-from docker_images_helper import DockerImage, get_docker_image, pull_image
-from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
+from docker_images_helper import DockerImage, pull_image, get_docker_image
+from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
 from report import (
     ERROR,
     FAIL,
@@ -163,7 +162,7 @@ def main():
         status, description = ERROR, "Empty test_results.tsv"
 
     assert status is not None
-    status = override_status(status, check_name)
+
     test_results.append(
         TestResult(
             "All tests",
diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py
index 6fa9c1dd873..9dca3fae1dc 100644
--- a/tests/ci/upload_result_helper.py
+++ b/tests/ci/upload_result_helper.py
@@ -9,7 +9,7 @@ from env_helper import (
     GITHUB_RUN_URL,
     GITHUB_SERVER_URL,
 )
-from report import ReportColorTheme, TestResults, create_test_html_report
+from report import TestResults, create_test_html_report
 from s3_helper import S3Helper
 
 
@@ -92,10 +92,6 @@ def upload_results(
     else:
         raw_log_url = GITHUB_JOB_URL()
 
-    statuscolors = (
-        ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None
-    )
-
     if test_results or not ready_report_url:
         html_report = create_test_html_report(
             check_name,
@@ -107,7 +103,6 @@ def upload_results(
             branch_name,
             commit_url,
             additional_urls,
-            statuscolors=statuscolors,
         )
         report_path = Path("report.html")
         report_path.write_text(html_report, encoding="utf-8")
diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 7c922e339fe..08dd9ba276b 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -252,9 +252,7 @@ class ClickhouseIntegrationTestsRunner:
         self.image_versions = self.params["docker_images_with_versions"]
         self.shuffle_groups = self.params["shuffle_test_groups"]
         self.flaky_check = "flaky check" in self.params["context_name"]
-        self.bugfix_validate_check = (
-            "bugfix validate check" in self.params["context_name"]
-        )
+        self.bugfix_validate_check = "bugfix" in self.params["context_name"].lower()
         # if use_tmpfs is not set we assume it to be true, otherwise check
         self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"]
         self.disable_net_host = (

From 8397b856ae98d2a54dac50d301104e6813bd1b25 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 15 Feb 2024 17:28:54 +0100
Subject: [PATCH 736/884] Cleanup

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a2a6c8e128d..4d141ee962f 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6626,7 +6626,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     QueryExpressionsAliasVisitor & expressions_visitor,
     bool nested_table_function)
 {
-    auto &table_function_node_typed = table_function_node->as<TableFunctionNode &>();
+    auto & table_function_node_typed = table_function_node->as<TableFunctionNode &>();
 
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());

From 896e3298df58d3627614060e2709cda096df7a28 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 17:49:05 +0100
Subject: [PATCH 737/884] Update 02981_vertical_merges_memory_usage.sql

---
 .../queries/0_stateless/02981_vertical_merges_memory_usage.sql  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
index c1b6e0beb7d..b784e734457 100644
--- a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
+++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql
@@ -14,6 +14,8 @@ SETTINGS
     merge_max_block_size_bytes = '10M';
 
 INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000);
+-- Why 3001? - Deduplication, which is off with normal MergeTree by default but on for ReplicatedMergeTree and SharedMergeTree.
+-- We automatically replace MergeTree with SharedMergeTree in ClickHouse Cloud.
 INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3001);
 
 OPTIMIZE TABLE t_vertical_merge_memory FINAL;

From 7c73af9f13690b3ac5ee757a18aca1b848b76bc8 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 15 Feb 2024 17:49:35 +0100
Subject: [PATCH 738/884] Remove test completely

---
 .../__init__.py                               |  0
 .../configs/legacy.xml                        |  7 --
 .../configs/remote_servers.xml                | 18 ------
 .../test.py                                   | 64 -------------------
 4 files changed, 89 deletions(-)
 delete mode 100644 tests/integration/test_distributed_backward_compatability/__init__.py
 delete mode 100644 tests/integration/test_distributed_backward_compatability/configs/legacy.xml
 delete mode 100644 tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_distributed_backward_compatability/test.py

diff --git a/tests/integration/test_distributed_backward_compatability/__init__.py b/tests/integration/test_distributed_backward_compatability/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_distributed_backward_compatability/configs/legacy.xml b/tests/integration/test_distributed_backward_compatability/configs/legacy.xml
deleted file mode 100644
index 5c1985a17a4..00000000000
--- a/tests/integration/test_distributed_backward_compatability/configs/legacy.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default>
-            <legacy_column_name_of_tuple_literal>1</legacy_column_name_of_tuple_literal>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml b/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml
deleted file mode 100644
index 68b420f36b4..00000000000
--- a/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml
+++ /dev/null
@@ -1,18 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_cluster>
-            <shard>
-                <replica>
-                    <host>node1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>node2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-    </remote_servers>
-</clickhouse>
diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py
deleted file mode 100644
index 21ae5f2dc59..00000000000
--- a/tests/integration/test_distributed_backward_compatability/test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import pytest
-
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-node_old = cluster.add_instance(
-    "node1",
-    main_configs=["configs/remote_servers.xml"],
-    image="clickhouse/clickhouse-server",
-    tag="24.1",
-    stay_alive=True,
-    with_installed_binary=True,
-)
-node_new = cluster.add_instance(
-    "node2",
-    main_configs=["configs/remote_servers.xml"],
-    user_configs=["configs/legacy.xml"],
-)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-
-        for node in (node_old, node_new):
-            node.query(
-                "CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id"
-            )
-
-        node_old.query("INSERT INTO local_table VALUES (1, 'node1')")
-        node_new.query("INSERT INTO local_table VALUES (2, 'node2')")
-
-        node_old.query(
-            "CREATE TABLE distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table)"
-        )
-        node_new.query(
-            "CREATE TABLE distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table)"
-        )
-
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def test_distributed_in_tuple(started_cluster):
-    query1 = "SELECT count() FROM distributed WHERE (id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))"
-    query2 = (
-        "SELECT sum((id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))) FROM distributed"
-    )
-    assert node_old.query(query1) == "1\n"
-    assert node_old.query(query2) == "1\n"
-    assert node_new.query(query1) == "1\n"
-    assert node_new.query(query2) == "1\n"
-
-    large_set = "(" + ",".join([str(i) for i in range(1000)]) + ")"
-    query3 = "SELECT count() FROM distributed WHERE id IN " + large_set
-    query4 = "SELECT sum(id IN {}) FROM distributed".format(large_set)
-    assert node_old.query(query3) == "2\n"
-    assert node_old.query(query4) == "2\n"
-    assert node_new.query(query3) == "2\n"
-    assert node_new.query(query4) == "2\n"

From 2a783321d78b4427f1f8fff2eda6ee60558c41c1 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 15 Feb 2024 17:51:48 +0100
Subject: [PATCH 739/884] Fix deletion of s3 queue

---
 src/Storages/S3Queue/S3QueueFilesMetadata.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
index ac80ded5792..8583de27e00 100644
--- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp
@@ -214,7 +214,7 @@ size_t S3QueueFilesMetadata::registerNewShard()
     }
 
     const auto zk_client = getZooKeeper();
-    zk_client->createAncestors(zookeeper_shards_path / "");
+    zk_client->createIfNotExists(zookeeper_shards_path, "");
 
     std::string shard_node_path;
     size_t shard_id = 0;
@@ -287,7 +287,10 @@ void S3QueueFilesMetadata::unregisterShard(size_t shard_id)
 
     const auto zk_client = getZooKeeper();
     const auto node_path = getZooKeeperPathForShard(shard_id);
-    zk_client->remove(node_path);
+    auto error_code = zk_client->tryRemove(node_path);
+    if (error_code != Coordination::Error::ZOK
+        && error_code != Coordination::Error::ZNONODE)
+        throw zkutil::KeeperException::fromPath(error_code, node_path);
 }
 
 size_t S3QueueFilesMetadata::getProcessingIdsNum() const

From 365530c463de4b18c15e94b04bc28e84ed7e0002 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 15 Feb 2024 17:12:22 +0000
Subject: [PATCH 740/884] Fix 01656_test_query_log_factories_info with
 analyzer.

---
 src/Analyzer/Passes/ArrayExistsToHasPass.cpp  |  4 ++-
 src/Analyzer/Passes/CNF.cpp                   | 29 ++++++++++---------
 src/Analyzer/Passes/CNF.h                     |  2 +-
 .../Passes/ConvertOrLikeChainPass.cpp         |  8 +++--
 src/Analyzer/Passes/ConvertQueryToCNFPass.cpp |  4 +--
 src/Analyzer/Passes/CrossToInnerJoinPass.cpp  |  3 +-
 src/Analyzer/Passes/IfChainToMultiIfPass.cpp  |  4 ++-
 src/Analyzer/Passes/MultiIfToIfPass.cpp       |  4 ++-
 src/Functions/FunctionsLogical.cpp            | 17 +++++++++++
 src/Functions/array/has.cpp                   |  6 ++++
 src/Functions/array/has.h                     | 12 ++++++++
 src/Functions/functionsLogical.h              | 15 ++++++++++
 src/Functions/if.cpp                          |  5 ++++
 src/Functions/if.h                            | 12 ++++++++
 src/Functions/multiIf.cpp                     | 26 +++++++++++++----
 src/Functions/multiIf.h                       | 12 ++++++++
 src/Functions/multiMatchAny.cpp               |  5 ++++
 src/Functions/multiMatchAny.h                 | 12 ++++++++
 .../01656_test_query_log_factories_info.sql   |  4 ++-
 19 files changed, 154 insertions(+), 30 deletions(-)
 create mode 100644 src/Functions/array/has.h
 create mode 100644 src/Functions/functionsLogical.h
 create mode 100644 src/Functions/if.h
 create mode 100644 src/Functions/multiIf.h
 create mode 100644 src/Functions/multiMatchAny.h

diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
index 36c3df4d93a..62db502e1dc 100644
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
@@ -1,6 +1,7 @@
 #include <Analyzer/Passes/ArrayExistsToHasPass.h>
 
 #include <Functions/FunctionFactory.h>
+#include <Functions/array/has.h>
 
 #include <Interpreters/Context.h>
 
@@ -83,7 +84,8 @@ public:
             return;
         }
 
-        auto has_function = FunctionFactory::instance().get("has", getContext());
+        auto has_function = createInternalFunctionHasOverloadResolver();
+
         array_exists_function_arguments_nodes[0] = std::move(array_exists_function_arguments_nodes[1]);
         array_exists_function_arguments_nodes[1] = std::move(has_constant_element_argument);
         array_exists_function_node->resolveAsFunction(has_function->build(array_exists_function_node->getArgumentColumns()));
diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp
index aa6ee539934..1cde190606a 100644
--- a/src/Analyzer/Passes/CNF.cpp
+++ b/src/Analyzer/Passes/CNF.cpp
@@ -10,6 +10,7 @@
 #include <IO/Operators.h>
 
 #include <Functions/FunctionFactory.h>
+#include <Functions/functionsLogical.h>
 
 #include <Common/checkStackSize.h>
 
@@ -79,7 +80,7 @@ public:
 
         if (name == "and" || name == "or")
         {
-            auto function_resolver = FunctionFactory::instance().get(name, current_context);
+            auto function_resolver = name == "and" ? createInternalFunctionAndOverloadResolver() : createInternalFunctionOrOverloadResolver();
 
             const auto & arguments = function_node->getArguments().getNodes();
             if (arguments.size() > 2)
@@ -110,10 +111,10 @@ private:
 class PushNotVisitor
 {
 public:
-    explicit PushNotVisitor(const ContextPtr & context)
-        : not_function_resolver(FunctionFactory::instance().get("not", context))
-        , or_function_resolver(FunctionFactory::instance().get("or", context))
-        , and_function_resolver(FunctionFactory::instance().get("and", context))
+    explicit PushNotVisitor()
+        : not_function_resolver(createInternalFunctionNotOverloadResolver())
+        , or_function_resolver(createInternalFunctionOrOverloadResolver())
+        , and_function_resolver(createInternalFunctionAndOverloadResolver())
     {}
 
     void visit(QueryTreeNodePtr & node, bool add_negation)
@@ -162,10 +163,10 @@ private:
 class PushOrVisitor
 {
 public:
-    PushOrVisitor(ContextPtr context, size_t max_atoms_)
+    PushOrVisitor(size_t max_atoms_)
         : max_atoms(max_atoms_)
-        , and_resolver(FunctionFactory::instance().get("and", context))
-        , or_resolver(FunctionFactory::instance().get("or", context))
+        , and_resolver(createInternalFunctionAndOverloadResolver())
+        , or_resolver(createInternalFunctionOrOverloadResolver())
     {}
 
     bool visit(QueryTreeNodePtr & node, size_t num_atoms)
@@ -513,11 +514,11 @@ std::optional<CNF> CNF::tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr co
     }
 
     {
-        PushNotVisitor visitor(context);
+        PushNotVisitor visitor;
         visitor.visit(node_cloned, false);
     }
 
-    if (PushOrVisitor visitor(context, max_atoms);
+    if (PushOrVisitor visitor(max_atoms);
         !visitor.visit(node_cloned, atom_count))
             return std::nullopt;
 
@@ -542,7 +543,7 @@ CNF CNF::toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_gro
     return *cnf;
 }
 
-QueryTreeNodePtr CNF::toQueryTree(ContextPtr context) const
+QueryTreeNodePtr CNF::toQueryTree() const
 {
     if (statements.empty())
         return nullptr;
@@ -550,9 +551,9 @@ QueryTreeNodePtr CNF::toQueryTree(ContextPtr context) const
     QueryTreeNodes and_arguments;
     and_arguments.reserve(statements.size());
 
-    auto not_resolver = FunctionFactory::instance().get("not", context);
-    auto or_resolver = FunctionFactory::instance().get("or", context);
-    auto and_resolver = FunctionFactory::instance().get("and", context);
+    auto not_resolver = createInternalFunctionNotOverloadResolver();
+    auto or_resolver = createInternalFunctionOrOverloadResolver();
+    auto and_resolver = createInternalFunctionAndOverloadResolver();
 
     const auto function_node_from_atom = [&](const auto & atom) -> QueryTreeNodePtr
     {
diff --git a/src/Analyzer/Passes/CNF.h b/src/Analyzer/Passes/CNF.h
index ec639cd6679..9325d97d2f2 100644
--- a/src/Analyzer/Passes/CNF.h
+++ b/src/Analyzer/Passes/CNF.h
@@ -54,7 +54,7 @@ public:
     static std::optional<CNF> tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER);
     static CNF toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER);
 
-    QueryTreeNodePtr toQueryTree(ContextPtr context) const;
+    QueryTreeNodePtr toQueryTree() const;
 
     const auto & getStatements() const
     {
diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
index 905819bf49f..162948edc57 100644
--- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
+++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
@@ -11,6 +11,8 @@
 #include <DataTypes/DataTypesNumber.h>
 
 #include <Functions/FunctionFactory.h>
+#include <Functions/multiMatchAny.h>
+#include <Functions/functionsLogical.h>
 
 #include <Interpreters/Context.h>
 
@@ -134,8 +136,10 @@ private:
 
 void ConvertOrLikeChainPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
-    auto or_function_resolver = FunctionFactory::instance().get("or", context);
-    auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context);
+    const auto & settings = context->getSettingsRef();
+    auto match_function_resolver = createInternalMultiMatchAnyOverloadResolver(settings.allow_hyperscan, settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length, settings.reject_expensive_hyperscan_regexps);
+    auto or_function_resolver = createInternalFunctionOrOverloadResolver();
+
     ConvertOrLikeChainVisitor visitor(std::move(or_function_resolver), std::move(match_function_resolver), std::move(context));
     visitor.visit(query_tree_node);
 }
diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
index 5ce1ea43f2f..96bc62212fd 100644
--- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
+++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
@@ -339,7 +339,7 @@ void addIndexConstraint(Analyzer::CNF & cnf, const QueryTreeNodes & table_expres
         {
             Analyzer::CNF::OrGroup new_group;
             auto index_hint_node = std::make_shared<FunctionNode>("indexHint");
-            index_hint_node->getArguments().getNodes().push_back(Analyzer::CNF{std::move(and_group)}.toQueryTree(context));
+            index_hint_node->getArguments().getNodes().push_back(Analyzer::CNF{std::move(and_group)}.toQueryTree());
             index_hint_node->resolveAsFunction(FunctionFactory::instance().get("indexHint", context));
             new_group.insert({false, QueryTreeNodePtrWithHash{std::move(index_hint_node)}});
 
@@ -676,7 +676,7 @@ void optimizeNode(QueryTreeNodePtr & node, const QueryTreeNodes & table_expressi
     if (settings.optimize_using_constraints)
         optimizeWithConstraints(*cnf, table_expressions, context);
 
-    auto new_node = cnf->toQueryTree(context);
+    auto new_node = cnf->toQueryTree();
     node = std::move(new_node);
 }
 
diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
index 154babf3d9a..9bbf3aad15d 100644
--- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
+++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
@@ -12,6 +12,7 @@
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
+#include <Functions/functionsLogical.h>
 
 #include <Common/logger_useful.h>
 
@@ -256,7 +257,7 @@ private:
         for (const auto & node : nodes)
             function_node->getArguments().getNodes().push_back(node);
 
-        const auto & function = FunctionFactory::instance().get("and", getContext());
+        const auto & function = createInternalFunctionAndOverloadResolver();
         function_node->resolveAsFunction(function->build(function_node->getArgumentColumns()));
         return function_node;
     }
diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
index 88e350ffa2e..70b717f3108 100644
--- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
+++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp
@@ -5,6 +5,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/multiIf.h>
 
 namespace DB
 {
@@ -75,7 +76,8 @@ private:
 
 void IfChainToMultiIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
-    auto multi_if_function_ptr = FunctionFactory::instance().get("multiIf", context);
+    const auto & settings = context->getSettingsRef();
+    auto multi_if_function_ptr = createInternalMultiIfOverloadResolver(settings.allow_execute_multiif_columnar, settings.allow_experimental_variant_type, settings.use_variant_as_common_type);
     IfChainToMultiIfPassVisitor visitor(std::move(multi_if_function_ptr), std::move(context));
     visitor.visit(query_tree_node);
 }
diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp
index 8e09d5cab38..c42ea61b34a 100644
--- a/src/Analyzer/Passes/MultiIfToIfPass.cpp
+++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp
@@ -3,6 +3,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/if.h>
 
 namespace DB
 {
@@ -54,7 +55,8 @@ private:
 
 void MultiIfToIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
-    auto if_function_ptr = FunctionFactory::instance().get("if", context);
+    const auto & settings = context->getSettingsRef();
+    auto if_function_ptr = createInternalFunctionIfOverloadResolver(settings.allow_experimental_variant_type, settings.use_variant_as_common_type);
     MultiIfToIfVisitor visitor(std::move(if_function_ptr), std::move(context));
     visitor.visit(query_tree_node);
 }
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index d01fdc99076..380f2260ed8 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -776,4 +776,21 @@ ColumnPtr FunctionUnaryLogical<Impl, Name>::executeImpl(const ColumnsWithTypeAnd
     return res;
 }
 
+FunctionOverloadResolverPtr createInternalFunctionOrOverloadResolver()
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionOr>());
+}
+FunctionOverloadResolverPtr createInternalFunctionAndOverloadResolver()
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+}
+FunctionOverloadResolverPtr createInternalFunctionXorOverloadResolver()
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionXor>());
+}
+FunctionOverloadResolverPtr createInternalFunctionNotOverloadResolver()
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionNot>());
+}
+
 }
diff --git a/src/Functions/array/has.cpp b/src/Functions/array/has.cpp
index f08a4f29d2d..a17dcdcfbf9 100644
--- a/src/Functions/array/has.cpp
+++ b/src/Functions/array/has.cpp
@@ -9,4 +9,10 @@ struct NameHas { static constexpr auto name = "has"; };
 using FunctionHas = FunctionArrayIndex<HasAction, NameHas>;
 
 REGISTER_FUNCTION(Has) { factory.registerFunction<FunctionHas>(); }
+
+FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver()
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionHas>());
+}
+
 }
diff --git a/src/Functions/array/has.h b/src/Functions/array/has.h
new file mode 100644
index 00000000000..226662d4051
--- /dev/null
+++ b/src/Functions/array/has.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <memory>
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver();
+
+}
diff --git a/src/Functions/functionsLogical.h b/src/Functions/functionsLogical.h
new file mode 100644
index 00000000000..d2d07f6cec7
--- /dev/null
+++ b/src/Functions/functionsLogical.h
@@ -0,0 +1,15 @@
+#pragma once
+#include <memory>
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+FunctionOverloadResolverPtr createInternalFunctionOrOverloadResolver();
+FunctionOverloadResolverPtr createInternalFunctionAndOverloadResolver();
+FunctionOverloadResolverPtr createInternalFunctionXorOverloadResolver();
+FunctionOverloadResolverPtr createInternalFunctionNotOverloadResolver();
+
+}
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 7306dc4173e..70aced8842a 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1413,4 +1413,9 @@ REGISTER_FUNCTION(If)
     factory.registerFunction<FunctionIf>({}, FunctionFactory::CaseInsensitive);
 }
 
+FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type)
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionIf>(allow_experimental_variant_type && use_variant_as_common_type));
+}
+
 }
diff --git a/src/Functions/if.h b/src/Functions/if.h
new file mode 100644
index 00000000000..09a7a6a3e78
--- /dev/null
+++ b/src/Functions/if.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <memory>
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type);
+
+}
diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp
index cb946b55c73..af7afb75e1a 100644
--- a/src/Functions/multiIf.cpp
+++ b/src/Functions/multiIf.cpp
@@ -40,9 +40,17 @@ class FunctionMultiIf final : public FunctionIfBase
 {
 public:
     static constexpr auto name = "multiIf";
-    static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionMultiIf>(context_); }
+    static FunctionPtr create(ContextPtr context_)
+    {
+        const auto & settings = context_->getSettingsRef();
+        return std::make_shared<FunctionMultiIf>(settings.allow_execute_multiif_columnar, settings.allow_experimental_variant_type, settings.use_variant_as_common_type);
+    }
 
-    explicit FunctionMultiIf(ContextPtr context_) : context(context_) { }
+    explicit FunctionMultiIf(bool allow_execute_multiif_columnar_, bool allow_experimental_variant_type_, bool use_variant_as_common_type_)
+        : allow_execute_multiif_columnar(allow_execute_multiif_columnar_)
+        , allow_experimental_variant_type(allow_experimental_variant_type_)
+        , use_variant_as_common_type(use_variant_as_common_type_)
+    {}
 
     String getName() const override { return name; }
     bool isVariadic() const override { return true; }
@@ -118,7 +126,7 @@ public:
             types_of_branches.emplace_back(arg);
         });
 
-        if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type)
+        if (allow_experimental_variant_type && use_variant_as_common_type)
             return getLeastSupertypeOrVariant(types_of_branches);
 
         return getLeastSupertype(types_of_branches);
@@ -240,10 +248,9 @@ public:
             }
         }
 
-        const auto & settings = context->getSettingsRef();
         const WhichDataType which(removeNullable(result_type));
         bool execute_multiif_columnar
-            = settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat());
+            = allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat());
 
         size_t rows = input_rows_count;
         if (!execute_multiif_columnar)
@@ -507,7 +514,9 @@ private:
             executeColumnIfNeeded(arguments[i], true);
     }
 
-    ContextPtr context;
+    const bool allow_execute_multiif_columnar;
+    const bool allow_experimental_variant_type;
+    const bool use_variant_as_common_type;
 };
 
 }
@@ -521,6 +530,11 @@ REGISTER_FUNCTION(MultiIf)
     factory.registerFunction<FunctionMultiIf>("caseWithoutExpression");
 }
 
+FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type)
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMultiIf>(allow_execute_multiif_columnar, allow_experimental_variant_type, use_variant_as_common_type));
+}
+
 }
 
 
diff --git a/src/Functions/multiIf.h b/src/Functions/multiIf.h
new file mode 100644
index 00000000000..617d63b89bc
--- /dev/null
+++ b/src/Functions/multiIf.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <memory>
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type);
+
+}
diff --git a/src/Functions/multiMatchAny.cpp b/src/Functions/multiMatchAny.cpp
index 6e6abe61898..054a60fce2d 100644
--- a/src/Functions/multiMatchAny.cpp
+++ b/src/Functions/multiMatchAny.cpp
@@ -22,4 +22,9 @@ REGISTER_FUNCTION(MultiMatchAny)
     factory.registerFunction<FunctionMultiMatchAny>();
 }
 
+FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps)
+{
+    return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMultiMatchAny>(allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps));
+}
+
 }
diff --git a/src/Functions/multiMatchAny.h b/src/Functions/multiMatchAny.h
new file mode 100644
index 00000000000..4548ec1d593
--- /dev/null
+++ b/src/Functions/multiMatchAny.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <memory>
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps);
+
+}
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
index 020d7cc5e72..8a6b604b053 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
@@ -41,7 +41,9 @@ FROM system.query_log WHERE current_database = currentDatabase() AND type = 'Que
 ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
 SELECT '';
 
-SELECT arraySort(used_functions)
+-- 1. analyzer includes arrayJoin into functions list
+-- 2. for crc32 (CaseInsensitive function) we use lower case now
+SELECT arraySort(arrayMap(x -> x == 'crc32' ? 'CRC32' : x, arrayFilter(x-> x != 'arrayJoin', used_functions))) as `arraySort(used_functions)`
 FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
 ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
 SELECT '';

From 819effb6db1db310a0644292277ebf9bdcc7472e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 15 Feb 2024 17:15:06 +0000
Subject: [PATCH 741/884] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 29331d674c8..cec528fc68e 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -1,4 +1,3 @@
-00223_shard_distributed_aggregation_memory_efficient
 00717_merge_and_distributed
 00725_memory_tracking
 01062_pm_all_join_with_block_continuation

From ef7e8e0c5db28c4d6e01927e6fa9593d1f63cfcc Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 15 Feb 2024 17:35:44 +0000
Subject: [PATCH 742/884] Fixing style.

---
 src/Analyzer/Passes/CNF.cpp                     | 2 +-
 src/Analyzer/Passes/ConvertOrLikeChainPass.cpp  | 2 +-
 src/Analyzer/Passes/CrossToInnerJoinPass.cpp    | 2 +-
 src/Functions/FunctionsLogical.cpp              | 1 +
 src/Functions/{functionsLogical.h => logical.h} | 0
 5 files changed, 4 insertions(+), 3 deletions(-)
 rename src/Functions/{functionsLogical.h => logical.h} (100%)

diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp
index 1cde190606a..5cb79011856 100644
--- a/src/Analyzer/Passes/CNF.cpp
+++ b/src/Analyzer/Passes/CNF.cpp
@@ -10,7 +10,7 @@
 #include <IO/Operators.h>
 
 #include <Functions/FunctionFactory.h>
-#include <Functions/functionsLogical.h>
+#include <Functions/logical.h>
 
 #include <Common/checkStackSize.h>
 
diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
index 162948edc57..eb897ef8746 100644
--- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
+++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
@@ -12,7 +12,7 @@
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/multiMatchAny.h>
-#include <Functions/functionsLogical.h>
+#include <Functions/logical.h>
 
 #include <Interpreters/Context.h>
 
diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
index 9bbf3aad15d..d0a5656d334 100644
--- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
+++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
@@ -12,7 +12,7 @@
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
-#include <Functions/functionsLogical.h>
+#include <Functions/logical.h>
 
 #include <Common/logger_useful.h>
 
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index 380f2260ed8..d0795941e1f 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -1,5 +1,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsLogical.h>
+#include <Functions/logical.h>
 
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnNullable.h>
diff --git a/src/Functions/functionsLogical.h b/src/Functions/logical.h
similarity index 100%
rename from src/Functions/functionsLogical.h
rename to src/Functions/logical.h

From 2bda56d2003fa9085564a256ac6cae94419d2db9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 15 Feb 2024 17:56:15 +0000
Subject: [PATCH 743/884] Fixing analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index cec528fc68e..d6408ab712d 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -1,3 +1,4 @@
+00223_shard_distributed_aggregation_memory_efficien
 00717_merge_and_distributed
 00725_memory_tracking
 01062_pm_all_join_with_block_continuation
@@ -6,7 +7,6 @@
 01244_optimize_distributed_group_by_sharding_key
 01584_distributed_buffer_cannot_find_column
 01624_soft_constraints
-01656_test_query_log_factories_info
 01747_join_view_filter_dictionary
 01761_cast_to_enum_nullable
 01925_join_materialized_columns

From f17d58ccedfd2a4dbb238339bb6fb4de748b9bc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 20:53:22 +0100
Subject: [PATCH 744/884] Add comment

---
 src/Functions/array/arrayFold.cpp                         | 3 +++
 .../0_stateless/02990_arrayFold_nullable_lc.reference     | 2 ++
 tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql | 8 ++++++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp
index 94d05238cea..63c14f475fc 100644
--- a/src/Functions/array/arrayFold.cpp
+++ b/src/Functions/array/arrayFold.cpp
@@ -32,6 +32,9 @@ public:
     size_t getNumberOfArguments() const override { return 0; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
+    /// Avoid the default adaptors since they modify the inputs and that makes knowing the lambda argument types
+    /// (getLambdaArgumentTypes) more complex, as it requires knowing what the adaptors will do
+    /// It's much simpler to avoid the adapters
     bool useDefaultImplementationForNulls() const override { return false; }
     bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
 
diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
index 59eb1cea7a0..5bd5d7bbd90 100644
--- a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
+++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference
@@ -12,3 +12,5 @@
 23
 23
 23
+\N
+\N
diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql
index 01bd949bd4a..280defdfbb4 100644
--- a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql
+++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql
@@ -19,13 +19,17 @@ SELECT arrayFold((acc, x) -> acc + x, materialize(CAST([1, 2, NULL], 'Array(Null
 SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toInt64(3)); -- { serverError TYPE_MISMATCH }
 SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toNullable(toInt64(3)));
 
-
 SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3))); -- { serverError TYPE_MISMATCH }
 SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3)));
 SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
 SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3)));
 
-
 SELECT arrayFold((acc, x) -> acc + (x * 2), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
 SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH }
 SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toNullable(3))); -- { serverError TYPE_MISMATCH }
+
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], NULL);
+-- It's debatable which one of the following 2 queries should work, but considering the return type must match the
+-- accumulator type it makes sense to be the second one
+SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], NULL::LowCardinality(Nullable(Int64))); -- { serverError TYPE_MISMATCH }
+SELECT arrayFold((acc, x) -> (acc + (x * 2))::LowCardinality(Nullable(Int64)), [1, 2, 3, 4], NULL::LowCardinality(Nullable(Int64)));

From a249840421de5fd432e648e96ebe55c94123583d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 21:42:47 +0100
Subject: [PATCH 745/884] Fix formatting of NOT with single literals

---
 src/Parsers/ASTFunction.cpp                   | 21 +++++++++++--------
 .../02990_format_not_precedence.reference     | 13 ++++++++++++
 .../02990_format_not_precedence.sql           |  7 +++++++
 3 files changed, 32 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/02990_format_not_precedence.reference
 create mode 100644 tests/queries/0_stateless/02990_format_not_precedence.sql

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index e7f7b48091a..dc3b011b096 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -835,34 +835,37 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
                 const auto * literal = arguments->children[0]->as<ASTLiteral>();
                 const auto * function = arguments->children[0]->as<ASTFunction>();
-                bool negate = name == "negate";
                 bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple;
                 // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
                 bool literal_need_parens = literal && !is_tuple;
+
                 // negate always requires parentheses, otherwise -(-1) will be printed as --1
-                bool negate_need_parens = negate && (literal_need_parens || (function && function->name == "negate"));
-                // We don't need parentheses around a single literal.
-                bool need_parens = !literal && frame.need_parens && !negate_need_parens;
+                bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate"));
+
+                /// We DO need parentheses around a single literal
+                /// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since
+                /// this is equal to SELECT NOT (0 + NOT 0)
+                bool outside_parens = frame.need_parens && !inside_parens;
 
                 // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
-                if (negate_need_parens)
+                if (inside_parens)
                     nested_need_parens.need_parens = false;
 
-                if (need_parens)
+                if (outside_parens)
                     settings.ostr << '(';
 
                 settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : "");
 
-                if (negate_need_parens)
+                if (inside_parens)
                     settings.ostr << '(';
 
                 arguments->formatImpl(settings, state, nested_need_parens);
                 written = true;
 
-                if (negate_need_parens)
+                if (inside_parens)
                     settings.ostr << ')';
 
-                if (need_parens)
+                if (outside_parens)
                     settings.ostr << ')';
 
                 break;
diff --git a/tests/queries/0_stateless/02990_format_not_precedence.reference b/tests/queries/0_stateless/02990_format_not_precedence.reference
new file mode 100644
index 00000000000..f44cf2fdb52
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_not_precedence.reference
@@ -0,0 +1,13 @@
+-- { echoOn }
+SELECT NOT 0 + NOT 0;
+0
+SELECT NOT (0 + (NOT 0));
+0
+SELECT (NOT 0) + (NOT 0);
+2
+SELECT formatQuery('SELECT NOT 0 + NOT 0');
+SELECT NOT (0 + (NOT 0))
+SELECT formatQuery('SELECT NOT (0 + (NOT 0))');
+SELECT NOT (0 + (NOT 0))
+SELECT formatQuery('SELECT (NOT 0) + (NOT 0)');
+SELECT (NOT 0) + (NOT 0)
diff --git a/tests/queries/0_stateless/02990_format_not_precedence.sql b/tests/queries/0_stateless/02990_format_not_precedence.sql
new file mode 100644
index 00000000000..98ef2c9e781
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_not_precedence.sql
@@ -0,0 +1,7 @@
+-- { echoOn }
+SELECT NOT 0 + NOT 0;
+SELECT NOT (0 + (NOT 0));
+SELECT (NOT 0) + (NOT 0);
+SELECT formatQuery('SELECT NOT 0 + NOT 0');
+SELECT formatQuery('SELECT NOT (0 + (NOT 0))');
+SELECT formatQuery('SELECT (NOT 0) + (NOT 0)');

From 867eb8b9b934502df56f8412014809d9d850be0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 15 Feb 2024 22:19:09 +0100
Subject: [PATCH 746/884] Adapt tests

---
 tests/queries/0_stateless/01920_not_chain_format.reference | 4 ++--
 tests/queries/0_stateless/01921_not_chain.reference        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01920_not_chain_format.reference b/tests/queries/0_stateless/01920_not_chain_format.reference
index 22abfd17dc7..bb58a0ff146 100644
--- a/tests/queries/0_stateless/01920_not_chain_format.reference
+++ b/tests/queries/0_stateless/01920_not_chain_format.reference
@@ -1,5 +1,5 @@
 -- { echo }
 EXPLAIN SYNTAX SELECT NOT NOT (NOT (NOT (NULL)));
-SELECT NOT (NOT (NOT NOT NULL))
+SELECT NOT (NOT (NOT (NOT NULL)))
 EXPLAIN SYNTAX SELECT NOT (NOT (NOT NOT NULL));
-SELECT NOT (NOT (NOT NOT NULL))
+SELECT NOT (NOT (NOT (NOT NULL)))
diff --git a/tests/queries/0_stateless/01921_not_chain.reference b/tests/queries/0_stateless/01921_not_chain.reference
index c29c66f1274..ebd18f4b342 100644
--- a/tests/queries/0_stateless/01921_not_chain.reference
+++ b/tests/queries/0_stateless/01921_not_chain.reference
@@ -4,6 +4,6 @@ SELECT 1 != (NOT 1);
 SELECT 1 != NOT 1;
 1
 EXPLAIN SYNTAX SELECT 1 != (NOT 1);
-SELECT 1 != NOT 1
+SELECT 1 != (NOT 1)
 EXPLAIN SYNTAX SELECT 1 != NOT 1;
-SELECT 1 != NOT 1
+SELECT 1 != (NOT 1)

From 712df1b51e5dc2da23525fadaa0c75223df9d922 Mon Sep 17 00:00:00 2001
From: Ronald Bradford <ronald.bradford@gmail.com>
Date: Thu, 15 Feb 2024 16:45:19 -0500
Subject: [PATCH 747/884] Update configuration-files.md

Fixed typo in XML example
---
 docs/en/operations/configuration-files.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index 5a7b12a7421..81b25a4e897 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -36,7 +36,7 @@ which is equal to
 <clickhouse>
     <profiles>
         <default>
-            <max_query_size/>150000</max_query_size>
+            <max_query_size>150000</max_query_size>
         </default>
     </profiles>
 </clickhouse>

From 15683ae6a0233bbce64c9c7ec7b40fb9969f2192 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 22:58:07 +0100
Subject: [PATCH 748/884] Revert wrong modification

---
 src/Interpreters/executeQuery.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 824bb3c255b..827023cf663 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -704,10 +704,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     {
         if (settings.dialect == Dialect::kusto && !internal)
         {
-            ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert);
-
-            /// TODO: parser should fail early when max_query_size limit is reached.
-            ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Kusto dialect is disabled until these two bugs will be fixed: https://github.com/ClickHouse/ClickHouse/issues/59037 and https://github.com/ClickHouse/ClickHouse/issues/59036");
         }
         else if (settings.dialect == Dialect::prql && !internal)
         {

From 123a34637d265358156fa3479342e943e35f3fa1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 15 Feb 2024 23:38:32 +0100
Subject: [PATCH 749/884] Disable tests with coverage

---
 .github/workflows/master.yml       | 16 ----------------
 .github/workflows/pull_request.yml | 16 ----------------
 2 files changed, 32 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 0e83a777641..24daca44da6 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -375,14 +375,6 @@ jobs:
       test_name: Stateless tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestCoverage:
-    needs: [RunConfig, BuilderDebReleaseCoverage]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (coverage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [RunConfig, BuilderDebRelease]
     if: ${{ !failure() && !cancelled() }}
@@ -483,14 +475,6 @@ jobs:
       test_name: Stateful tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestCoverage:
-    needs: [RunConfig, BuilderDebReleaseCoverage]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (coverage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatefulTestAarch64:
     needs: [RunConfig, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 405e1ec1502..cf86afc5f6e 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -391,14 +391,6 @@ jobs:
       test_name: Stateless tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestCoverage:
-    needs: [RunConfig, BuilderDebReleaseCoverage]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (coverage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [RunConfig, BuilderDebRelease]
     if: ${{ !failure() && !cancelled() }}
@@ -526,14 +518,6 @@ jobs:
       test_name: Stateful tests (release)
       runner_type: func-tester
       data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestCoverage:
-    needs: [RunConfig, BuilderDebReleaseCoverage]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (coverage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FunctionalStatefulTestAarch64:
     needs: [RunConfig, BuilderDebAarch64]
     if: ${{ !failure() && !cancelled() }}

From 2f36c9e965b01ca68a795499bfbdbec020c47609 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Feb 2024 00:56:24 +0100
Subject: [PATCH 750/884] Maybe less memory usage for primary keys in memory

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 +
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 0f82e00edff..9e264cba3f8 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -842,6 +842,7 @@ void IMergeTreeDataPart::loadIndex()
 
         for (size_t i = 0; i < key_size; ++i)
         {
+            loaded_index[i]->shrinkToFit();
             loaded_index[i]->protect();
             if (loaded_index[i]->size() != marks_count)
                 throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: "
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index fcf9d5bd17d..8da9298b290 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -660,7 +660,7 @@ private:
 
     virtual void appendFilesOfIndexGranularity(Strings & files) const;
 
-    /// Loads index file.
+    /// Loads the index file.
     void loadIndex();
 
     void appendFilesOfIndex(Strings & files) const;

From 77c8f671cd258129af6f627293fca2e45e063a5f Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 15 Feb 2024 18:09:11 -0800
Subject: [PATCH 751/884] [Docs] Add info on using
 select_sequential_consistency for INSERTS in cloud

---
 docs/en/sql-reference/statements/insert-into.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index f9d93305071..2eebd62cbc6 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -204,6 +204,20 @@ Result:
 └─────┴───────────────────────┘
 ```
 
+## Inserts into ClickHouse Cloud
+
+By default, services on ClickHouse Cloud provide multiple replicas for high availability. When you connect to a service, a connection is established to one of these replicas.
+
+After an `INSERT` succeeds, data is written to the underlying storage. However, it may take some time for replicas to receive these updates. Therefore, if you use a different connection that executes a `SELECT` query on one of these other replicas, the updated data may not yet be reflected.
+
+It is possible to use the `select_sequential_consistency` to force replicas to receive the latest updates. Here is an example of a SELECT query using this setting:
+
+```sql
+SELECT .... SETTINGS select_sequential_consistency = 1;
+```
+
+Note that using `select_sequential_consistency` will increase the load on ClickHouse Keeper (used by ClickHouse Cloud internally) and may result in slower performance depending on the load on the service. We recommend against enabling this setting unless necessary. The recommended approach is to execute read/writes in the same session or to use a client driver that uses the native protocol (and thus supports sticky connections).
+
 ## Performance Considerations
 
 `INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this:

From e168329ade6ff1153dc060c7504a0cbd2d71f8aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 16 Feb 2024 11:44:16 +0100
Subject: [PATCH 752/884] Revert "ReplicatedMergeTree invalid metadata_version
 fix"

---
 src/Storages/StorageReplicatedMergeTree.cpp   | 14 +++----
 ...ge_tree_invalid_metadata_version.reference | 14 -------
 ...ed_merge_tree_invalid_metadata_version.sql | 40 -------------------
 3 files changed, 5 insertions(+), 63 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
 delete mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8e1598a1eef..6119541ff52 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -513,15 +513,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
             if (same_structure)
             {
                 Coordination::Stat metadata_stat;
-                current_zookeeper->get(fs::path(zookeeper_path) / "metadata", &metadata_stat);
-
-                /** We change metadata_snapshot so that `createReplica` method will create `metadata_version` node in ZooKeeper
-                  * with version of table '/metadata' node in Zookeeper.
-                  *
-                  * Otherwise `metadata_version` for not first replica will be initialized with 0 by default.
-                  */
+                current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
                 setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version));
-                metadata_snapshot = getInMemoryMetadataPtr();
             }
         }
         catch (Coordination::Exception & e)
@@ -5824,7 +5817,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
     Coordination::Requests requests;
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "columns", entry.columns_str, -1));
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata", entry.metadata_str, -1));
-    requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(entry.alter_version), -1));
 
     auto table_id = getStorageID();
     auto alter_context = getContext();
@@ -5871,6 +5863,10 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
         resetObjectColumnsFromActiveParts(parts_lock);
     }
 
+    /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node
+    /// TODO Maybe do in in one transaction for Replicated database?
+    zookeeper->createOrUpdate(fs::path(replica_path) / "metadata_version", std::to_string(current_metadata->getMetadataVersion()), zkutil::CreateMode::Persistent);
+
     return true;
 }
 
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
deleted file mode 100644
index 128e3adcc0a..00000000000
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
+++ /dev/null
@@ -1,14 +0,0 @@
-Row 1:
-──────
-name:    metadata
-version: 1
---
-Row 1:
-──────
-name:  metadata_version
-value: 1
---
-id	UInt64					
-value	String					
-insert_time	DateTime					
-insert_time_updated	DateTime					
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
deleted file mode 100644
index 3e37f368fd8..00000000000
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
+++ /dev/null
@@ -1,40 +0,0 @@
--- Tags: zookeeper
-
-DROP TABLE IF EXISTS test_table_replicated;
-CREATE TABLE test_table_replicated
-(
-    id UInt64,
-    value String
-) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id;
-
-ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
-
-SELECT name, version FROM system.zookeeper
-WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/'
-AND name = 'metadata' FORMAT Vertical;
-
-DROP TABLE IF EXISTS test_table_replicated_second;
-CREATE TABLE test_table_replicated_second
-(
-    id UInt64,
-    value String,
-    insert_time DateTime
-) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id;
-
-DROP TABLE test_table_replicated;
-
-SELECT '--';
-
-SELECT name, value FROM system.zookeeper
-WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
-AND name = 'metadata_version' FORMAT Vertical;
-
-SYSTEM RESTART REPLICA test_table_replicated_second;
-
-ALTER TABLE test_table_replicated_second ADD COLUMN insert_time_updated DateTime;
-
-SELECT '--';
-
-DESCRIBE test_table_replicated_second;
-
-DROP TABLE test_table_replicated_second;

From 5b83e771ed70870374defeade7feba3118a6abb8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Feb 2024 13:06:57 +0100
Subject: [PATCH 753/884] Print CPU flags at startup

---
 programs/server/Server.cpp         | 15 +++++
 src/Common/{CpuId.h => CPUID.h}    | 91 +++++++++++++++---------------
 src/Common/ProfileEvents.cpp       |  8 +--
 src/Common/TargetSpecific.cpp      | 22 ++++----
 src/Common/ThreadProfileEvents.cpp | 16 +++---
 src/Functions/FunctionsJSON.h      |  2 +-
 src/Functions/divide/divide.cpp    |  6 +-
 7 files changed, 87 insertions(+), 73 deletions(-)
 rename src/Common/{CpuId.h => CPUID.h} (68%)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 53fc32663e7..b85bf6f814f 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -44,6 +44,7 @@
 #include <Common/assertProcessUserMatchesDataOwner.h>
 #include <Common/makeSocketAddress.h>
 #include <Common/FailPoint.h>
+#include <Common/CPUID.h>
 #include <Server/waitServersToFinish.h>
 #include <Interpreters/Cache/FileCacheFactory.h>
 #include <Core/ServerUUID.h>
@@ -712,6 +713,20 @@ try
         getNumberOfPhysicalCPUCores(),  // on ARM processors it can show only enabled at current moment cores
         std::thread::hardware_concurrency());
 
+    String cpu_info;
+#define COLLECT_FLAG(X) \
+    if (CPU::have##X()) \
+    {                   \
+        if (!cpu_info.empty()) \
+            cpu_info += ", ";  \
+        cpu_info += #X; \
+    }
+
+    CPU_ID_ENUMERATE(COLLECT_FLAG)
+#undef COLLECT_FLAG
+
+    LOG_INFO(log, "Available CPU instructions: {}", cpu_info);
+
     sanityChecks(*this);
 
     // Initialize global thread pool. Do it before we fetch configs from zookeeper
diff --git a/src/Common/CpuId.h b/src/Common/CPUID.h
similarity index 68%
rename from src/Common/CpuId.h
rename to src/Common/CPUID.h
index 1d15867289d..44b608ac1fe 100644
--- a/src/Common/CpuId.h
+++ b/src/Common/CPUID.h
@@ -11,7 +11,7 @@
 
 namespace DB
 {
-namespace Cpu
+namespace CPU
 {
 
 #if (defined(__x86_64__) || defined(__i386__))
@@ -98,7 +98,7 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
     OP(AMXTILE)              \
     OP(AMXINT8)
 
-union CpuInfo
+union CPUInfo
 {
     UInt32 info[4];
 
@@ -110,9 +110,9 @@ union CpuInfo
         UInt32 edx;
     } registers;
 
-    inline explicit CpuInfo(UInt32 op) noexcept { cpuid(op, info); }
+    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
 
-    inline CpuInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
+    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
 };
 
 #define DEF_NAME(X) inline bool have##X() noexcept;
@@ -121,67 +121,67 @@ union CpuInfo
 
 bool haveRDTSCP() noexcept
 {
-    return (CpuInfo(0x80000001).registers.edx >> 27) & 1u;
+    return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
 }
 
 bool haveSSE() noexcept
 {
-    return (CpuInfo(0x1).registers.edx >> 25) & 1u;
+    return (CPUInfo(0x1).registers.edx >> 25) & 1u;
 }
 
 bool haveSSE2() noexcept
 {
-    return (CpuInfo(0x1).registers.edx >> 26) & 1u;
+    return (CPUInfo(0x1).registers.edx >> 26) & 1u;
 }
 
 bool haveSSE3() noexcept
 {
-    return CpuInfo(0x1).registers.ecx & 1u;
+    return CPUInfo(0x1).registers.ecx & 1u;
 }
 
 bool havePCLMUL() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 1) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
 }
 
 bool haveSSSE3() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 9) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
 }
 
 bool haveSSE41() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 19) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
 }
 
 bool haveSSE42() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 20) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
 }
 
 bool haveF16C() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 29) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
 }
 
 bool havePOPCNT() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 23) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
 }
 
 bool haveAES() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 25) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
 }
 
 bool haveXSAVE() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 26) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
 }
 
 bool haveOSXSAVE() noexcept
 {
-    return (CpuInfo(0x1).registers.ecx >> 27) & 1u;
+    return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
 }
 
 bool haveAVX() noexcept
@@ -191,7 +191,7 @@ bool haveAVX() noexcept
     // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
     return haveOSXSAVE()                           // implies haveXSAVE()
            && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
-           && ((CpuInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
+           && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
 #else
     return false;
 #endif
@@ -199,22 +199,22 @@ bool haveAVX() noexcept
 
 bool haveFMA() noexcept
 {
-    return haveAVX() && ((CpuInfo(0x1).registers.ecx >> 12) & 1u);
+    return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
 }
 
 bool haveAVX2() noexcept
 {
-    return haveAVX() && ((CpuInfo(0x7, 0).registers.ebx >> 5) & 1u);
+    return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
 }
 
 bool haveBMI1() noexcept
 {
-    return (CpuInfo(0x7, 0).registers.ebx >> 3) & 1u;
+    return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
 }
 
 bool haveBMI2() noexcept
 {
-    return (CpuInfo(0x7, 0).registers.ebx >> 8) & 1u;
+    return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
 }
 
 bool haveAVX512F() noexcept
@@ -224,8 +224,8 @@ bool haveAVX512F() noexcept
     return haveOSXSAVE()                           // implies haveXSAVE()
            && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
            && ((our_xgetbv(0) >> 5) & 7u) == 7u       // ZMM state is enabled by OS
-           && CpuInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
-           && ((CpuInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
+           && CPUInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
+           && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
 #else
     return false;
 #endif
@@ -233,87 +233,87 @@ bool haveAVX512F() noexcept
 
 bool haveAVX512DQ() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 17) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
 }
 
 bool haveRDSEED() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 18) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
 }
 
 bool haveADX() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 19) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
 }
 
 bool haveAVX512IFMA() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 21) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
 }
 
 bool havePCOMMIT() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 22) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
 }
 
 bool haveCLFLUSHOPT() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 23) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
 }
 
 bool haveCLWB() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 24) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
 }
 
 bool haveAVX512PF() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 26) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
 }
 
 bool haveAVX512ER() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 27) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
 }
 
 bool haveAVX512CD() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 28) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
 }
 
 bool haveSHA() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ebx >> 29) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
 }
 
 bool haveAVX512BW() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 30) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
 }
 
 bool haveAVX512VL() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ebx >> 31) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
 }
 
 bool havePREFETCHWT1() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x7, 0).registers.ecx >> 0) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
 }
 
 bool haveAVX512VBMI() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
 }
 
 bool haveAVX512VBMI2() noexcept
 {
-    return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 6) & 1u);
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
 }
 
 bool haveRDRAND() noexcept
 {
-    return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u);
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
 }
 
 inline bool haveAMX() noexcept
@@ -330,22 +330,22 @@ inline bool haveAMX() noexcept
 bool haveAMXBF16() noexcept
 {
     return haveAMX()
-            && ((CpuInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
+            && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
 }
 
 bool haveAMXTILE() noexcept
 {
     return haveAMX()
-            && ((CpuInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
+            && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
 }
 
 bool haveAMXINT8() noexcept
 {
     return haveAMX()
-            && ((CpuInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
+            && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
 }
 
-struct CpuFlagsCache
+struct CPUFlagsCache
 {
 #define DEF_NAME(X) static inline bool have_##X = have##X();
     CPU_ID_ENUMERATE(DEF_NAME)
@@ -354,4 +354,3 @@ struct CpuFlagsCache
 
 }
 }
-
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index bdc5d2d88a8..679da441c76 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -310,7 +310,7 @@ The server successfully detected this situation and will download merged part fr
     M(ParallelReplicasStealingLeftoversMicroseconds, "Time spent collecting orphaned segments") \
     M(ParallelReplicasCollectingOwnedSegmentsMicroseconds, "Time spent collecting segments meant by hash") \
     \
-    M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
+    M(PerfCPUCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
     M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \
     M(PerfCacheReferences, "Cache accesses. Usually, this indicates Last Level Cache accesses, but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \
     M(PerfCacheMisses, "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERFCOUNTHWCACHEREFERENCES event to calculate cache miss rates.") \
@@ -319,12 +319,12 @@ The server successfully detected this situation and will download merged part fr
     M(PerfBusCycles, "Bus cycles, which can be different from total cycles.") \
     M(PerfStalledCyclesFrontend, "Stalled cycles during issue.") \
     M(PerfStalledCyclesBackend, "Stalled cycles during retirement.") \
-    M(PerfRefCpuCycles, "Total cycles; not affected by CPU frequency scaling.") \
+    M(PerfRefCPUCycles, "Total cycles; not affected by CPU frequency scaling.") \
     \
-    M(PerfCpuClock, "The CPU clock, a high-resolution per-CPU timer") \
+    M(PerfCPUClock, "The CPU clock, a high-resolution per-CPU timer") \
     M(PerfTaskClock, "A clock count specific to the task that is running") \
     M(PerfContextSwitches, "Number of context switches") \
-    M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \
+    M(PerfCPUMigrations, "Number of times the process has migrated to a new CPU") \
     M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
     M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
     M(PerfMinEnabledTime, "For all events, minimum time that an event was enabled. Used to track event multiplexing influence") \
diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp
index b115d3a8734..49f396c0926 100644
--- a/src/Common/TargetSpecific.cpp
+++ b/src/Common/TargetSpecific.cpp
@@ -1,7 +1,7 @@
 #include <base/defines.h>
 #include <Common/TargetSpecific.h>
 
-#include <Common/CpuId.h>
+#include <Common/CPUID.h>
 
 namespace DB
 {
@@ -9,25 +9,25 @@ namespace DB
 UInt32 getSupportedArchs()
 {
     UInt32 result = 0;
-    if (Cpu::CpuFlagsCache::have_SSE42)
+    if (CPU::CPUFlagsCache::have_SSE42)
         result |= static_cast<UInt32>(TargetArch::SSE42);
-    if (Cpu::CpuFlagsCache::have_AVX)
+    if (CPU::CPUFlagsCache::have_AVX)
         result |= static_cast<UInt32>(TargetArch::AVX);
-    if (Cpu::CpuFlagsCache::have_AVX2)
+    if (CPU::CPUFlagsCache::have_AVX2)
         result |= static_cast<UInt32>(TargetArch::AVX2);
-    if (Cpu::CpuFlagsCache::have_AVX512F)
+    if (CPU::CPUFlagsCache::have_AVX512F)
         result |= static_cast<UInt32>(TargetArch::AVX512F);
-    if (Cpu::CpuFlagsCache::have_AVX512BW)
+    if (CPU::CPUFlagsCache::have_AVX512BW)
         result |= static_cast<UInt32>(TargetArch::AVX512BW);
-    if (Cpu::CpuFlagsCache::have_AVX512VBMI)
+    if (CPU::CPUFlagsCache::have_AVX512VBMI)
         result |= static_cast<UInt32>(TargetArch::AVX512VBMI);
-    if (Cpu::CpuFlagsCache::have_AVX512VBMI2)
+    if (CPU::CPUFlagsCache::have_AVX512VBMI2)
         result |= static_cast<UInt32>(TargetArch::AVX512VBMI2);
-    if (Cpu::CpuFlagsCache::have_AMXBF16)
+    if (CPU::CPUFlagsCache::have_AMXBF16)
         result |= static_cast<UInt32>(TargetArch::AMXBF16);
-    if (Cpu::CpuFlagsCache::have_AMXTILE)
+    if (CPU::CPUFlagsCache::have_AMXTILE)
         result |= static_cast<UInt32>(TargetArch::AMXTILE);
-    if (Cpu::CpuFlagsCache::have_AMXINT8)
+    if (CPU::CPUFlagsCache::have_AMXINT8)
         result |= static_cast<UInt32>(TargetArch::AMXINT8);
     return result;
 }
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 990151d73ff..40ea1f43449 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -36,7 +36,7 @@ namespace ProfileEvents
     extern const Event OSReadBytes;
     extern const Event OSWriteBytes;
 
-    extern const Event PerfCpuCycles;
+    extern const Event PerfCPUCycles;
     extern const Event PerfInstructions;
     extern const Event PerfCacheReferences;
     extern const Event PerfCacheMisses;
@@ -45,12 +45,12 @@ namespace ProfileEvents
     extern const Event PerfBusCycles;
     extern const Event PerfStalledCyclesFrontend;
     extern const Event PerfStalledCyclesBackend;
-    extern const Event PerfRefCpuCycles;
+    extern const Event PerfRefCPUCycles;
 
-    extern const Event PerfCpuClock;
+    extern const Event PerfCPUClock;
     extern const Event PerfTaskClock;
     extern const Event PerfContextSwitches;
-    extern const Event PerfCpuMigrations;
+    extern const Event PerfCPUMigrations;
     extern const Event PerfAlignmentFaults;
     extern const Event PerfEmulationFaults;
     extern const Event PerfMinEnabledTime;
@@ -218,7 +218,7 @@ thread_local PerfEventsCounters current_thread_counters;
 
 // descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
 static const PerfEventInfo raw_events_info[] = {
-    HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCPUCycles),
     HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions),
     HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences),
     HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses),
@@ -227,13 +227,13 @@ static const PerfEventInfo raw_events_info[] = {
     HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles),
     HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend),
     HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend),
-    HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCPUCycles),
 
     // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896
-    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCPUClock),
     SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock),
     SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches),
-    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCPUMigrations),
     SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
     SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults),
 
diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index 31a99475b63..2539fa1aeb4 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -5,7 +5,7 @@
 
 #include <base/range.h>
 
-#include <Common/CpuId.h>
+#include <Common/CPUID.h>
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 
diff --git a/src/Functions/divide/divide.cpp b/src/Functions/divide/divide.cpp
index 6262d42a666..0708964c7d4 100644
--- a/src/Functions/divide/divide.cpp
+++ b/src/Functions/divide/divide.cpp
@@ -1,5 +1,5 @@
 #include "divide.h"
-#include <Common/CpuId.h>
+#include <Common/CPUID.h>
 
 #if defined(__x86_64__)
 namespace SSE2
@@ -26,9 +26,9 @@ template <typename A, typename B, typename ResultType>
 void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size)
 {
 #if defined(__x86_64__)
-    if (DB::Cpu::CpuFlagsCache::have_AVX2)
+    if (DB::CPU::CPUFlagsCache::have_AVX2)
         AVX2::divideImpl(a_pos, b, c_pos, size);
-    else if (DB::Cpu::CpuFlagsCache::have_SSE2)
+    else if (DB::CPU::CPUFlagsCache::have_SSE2)
         SSE2::divideImpl(a_pos, b, c_pos, size);
 #else
     Generic::divideImpl(a_pos, b, c_pos, size);

From 39f363ba6e7dc21c1ac9d8e1dddc92d677e5b302 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Feb 2024 13:09:07 +0100
Subject: [PATCH 754/884] Print CPU flags at startup

---
 programs/server/Server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index b85bf6f814f..593c90e44b5 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -725,7 +725,7 @@ try
     CPU_ID_ENUMERATE(COLLECT_FLAG)
 #undef COLLECT_FLAG
 
-    LOG_INFO(log, "Available CPU instructions: {}", cpu_info);
+    LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
 
     sanityChecks(*this);
 

From 1dac0246a31edc0b2416dc75dabcfe79af6814c8 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 16 Feb 2024 14:10:28 +0100
Subject: [PATCH 755/884] Remove extra empty line

---
 src/DataTypes/DataTypeTuple.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 26a871182a7..eb218d8efb7 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -385,7 +385,6 @@ void DataTypeTuple::forEachChild(const ChildCallback & callback) const
     }
 }
 
-
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.empty())

From b85a68790aa37b06b8ce3cfe80b9e232315053a9 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 16 Feb 2024 14:39:41 +0000
Subject: [PATCH 756/884] Cleanup: connection pool priority -> config priority

- names were creating confusion between config priority and balancing priority for a reader
---
 programs/benchmark/Benchmark.cpp              |  1 -
 src/Client/ConnectionPool.h                   | 65 +++++++++----------
 src/Client/ConnectionPoolWithFailover.cpp     |  8 ---
 src/Client/ConnectionPoolWithFailover.h       |  2 -
 src/Common/PoolWithFailoverBase.h             |  2 +-
 .../DistributedAsyncInsertDirectoryQueue.h    |  2 -
 6 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 961c678b936..fac88c0621f 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -2,7 +2,6 @@
 #include <cstdlib>
 #include <csignal>
 #include <iostream>
-#include <fstream>
 #include <iomanip>
 #include <optional>
 #include <random>
diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h
index 8e707e8190f..574c4992d75 100644
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@@ -28,7 +28,10 @@ public:
     using Entry = PoolBase<Connection>::Entry;
 
     IConnectionPool() = default;
-    IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {}
+    IConnectionPool(String host_, UInt16 port_, Priority config_priority_)
+        : host(host_), port(port_), address(host + ":" + toString(port_)), config_priority(config_priority_)
+    {
+    }
 
     virtual ~IConnectionPool() = default;
 
@@ -42,12 +45,13 @@ public:
     const std::string & getHost() const { return host; }
     UInt16 getPort() const { return port; }
     const String & getAddress() const { return address; }
-    virtual Priority getPriority() const { return Priority{1}; }
+    Priority getConfigPriority() const { return config_priority; }
 
 protected:
     const String host;
     const UInt16 port = 0;
     const String address;
+    const Priority config_priority;
 };
 
 using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@@ -61,32 +65,31 @@ public:
     using Entry = IConnectionPool::Entry;
     using Base = PoolBase<Connection>;
 
-    ConnectionPool(unsigned max_connections_,
-            const String & host_,
-            UInt16 port_,
-            const String & default_database_,
-            const String & user_,
-            const String & password_,
-            const String & quota_key_,
-            const String & cluster_,
-            const String & cluster_secret_,
-            const String & client_name_,
-            Protocol::Compression compression_,
-            Protocol::Secure secure_,
-            Priority priority_ = Priority{1})
-       : IConnectionPool(host_, port_),
-        Base(max_connections_,
-        getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
-        default_database(default_database_),
-        user(user_),
-        password(password_),
-        quota_key(quota_key_),
-        cluster(cluster_),
-        cluster_secret(cluster_secret_),
-        client_name(client_name_),
-        compression(compression_),
-        secure(secure_),
-        priority(priority_)
+    ConnectionPool(
+        unsigned max_connections_,
+        const String & host_,
+        UInt16 port_,
+        const String & default_database_,
+        const String & user_,
+        const String & password_,
+        const String & quota_key_,
+        const String & cluster_,
+        const String & cluster_secret_,
+        const String & client_name_,
+        Protocol::Compression compression_,
+        Protocol::Secure secure_,
+        Priority config_priority_ = Priority{1})
+        : IConnectionPool(host_, port_, config_priority_)
+        , Base(max_connections_, getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")"))
+        , default_database(default_database_)
+        , user(user_)
+        , password(password_)
+        , quota_key(quota_key_)
+        , cluster(cluster_)
+        , cluster_secret(cluster_secret_)
+        , client_name(client_name_)
+        , compression(compression_)
+        , secure(secure_)
     {
     }
 
@@ -114,11 +117,6 @@ public:
         return host + ":" + toString(port);
     }
 
-    Priority getPriority() const override
-    {
-        return priority;
-    }
-
 protected:
     /** Creates a new object to put in the pool. */
     ConnectionPtr allocObject() override
@@ -143,7 +141,6 @@ private:
     String client_name;
     Protocol::Compression compression; /// Whether to compress data when interacting with the server.
     Protocol::Secure secure;           /// Whether to encrypt data when interacting with the server.
-    Priority priority;                 /// priority from <remote_servers>
 };
 
 /**
diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index fdc0a11e533..4c91f64eb40 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -79,14 +79,6 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
     return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
 }
 
-Priority ConnectionPoolWithFailover::getPriority() const
-{
-    return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b)
-    {
-        return a->getPriority() < b->getPriority();
-    }))->getPriority();
-}
-
 ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
 {
     const auto [states, pools, error_decrease_time] = getPoolExtendedStates();
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index 7ccdd4787a4..49b988eb0b3 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -49,8 +49,6 @@ public:
               const Settings & settings,
               bool force_connected) override; /// From IConnectionPool
 
-    Priority getPriority() const override; /// From IConnectionPool
-
     /** Allocates up to the specified number of connections to work.
       * Connections provide access to different replicas of one shard.
       */
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 8fd83300eff..2f4223e0e61 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -66,7 +66,7 @@ public:
         , log(log_)
     {
         for (size_t i = 0;i < nested_pools.size(); ++i)
-            shared_pool_states[i].config_priority = nested_pools[i]->getPriority();
+            shared_pool_states[i].config_priority = nested_pools[i]->getConfigPriority();
     }
 
     struct TryResult
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
index f7d7553851a..a1b436bb9c8 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
@@ -6,9 +6,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <Interpreters/Cluster.h>
 #include <Disks/IDisk.h>
-#include <atomic>
 #include <mutex>
-#include <condition_variable>
 
 
 namespace CurrentMetrics { class Increment; }

From cb0ce2aaa94a818284ef3147603fa3441a737876 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 16 Feb 2024 15:26:08 +0000
Subject: [PATCH 757/884] Fix build

---
 src/Common/ThreadProfileEvents.cpp   | 2 --
 src/Coordination/KeeperConstants.cpp | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 40ea1f43449..6a63d484cd9 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -6,10 +6,8 @@
 #include "ProcfsMetricsProvider.h"
 #include "hasLinuxCapability.h"
 
-#include <filesystem>
 #include <fstream>
 #include <optional>
-#include <unordered_set>
 
 #include <fcntl.h>
 #include <unistd.h>
diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp
index 2aa84b691c4..f788095334e 100644
--- a/src/Coordination/KeeperConstants.cpp
+++ b/src/Coordination/KeeperConstants.cpp
@@ -85,7 +85,7 @@
     M(OSReadChars) \
     M(OSWriteChars) \
 \
-    M(PerfCpuCycles) \
+    M(PerfCPUCycles) \
     M(PerfInstructions) \
     M(PerfCacheReferences) \
     M(PerfCacheMisses) \
@@ -94,12 +94,12 @@
     M(PerfBusCycles) \
     M(PerfStalledCyclesFrontend) \
     M(PerfStalledCyclesBackend) \
-    M(PerfRefCpuCycles) \
+    M(PerfRefCPUCycles) \
 \
-    M(PerfCpuClock) \
+    M(PerfCPUClock) \
     M(PerfTaskClock) \
     M(PerfContextSwitches) \
-    M(PerfCpuMigrations) \
+    M(PerfCPUMigrations) \
     M(PerfAlignmentFaults) \
     M(PerfEmulationFaults) \
     M(PerfMinEnabledTime) \

From b01ccbbc8800d497defd968568bd0b675f3d7495 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 16 Feb 2024 10:21:40 +0100
Subject: [PATCH 758/884] Fix

---
 src/Coordination/Changelog.cpp                | 22 +++++++++++--------
 src/Coordination/Changelog.h                  |  2 +-
 src/Coordination/tests/gtest_coordination.cpp |  4 +---
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 63bfb709125..d743801870a 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -719,10 +719,10 @@ void LogEntryStorage::prefetchCommitLogs()
         {
             for (const auto & prefetch_file_info : prefetch_info->file_infos)
             {
-                const auto & [changelog_description, position, count] = prefetch_file_info;
-                changelog_description->withLock(
+                prefetch_file_info.file_description->withLock(
                     [&]
                     {
+                        const auto & [changelog_description, position, count] = prefetch_file_info;
                         auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
                         file->seek(position, SEEK_SET);
                         LOG_TRACE(
@@ -1020,7 +1020,6 @@ void LogEntryStorage::updateTermInfoWithNewEntry(uint64_t index, uint64_t term)
     if (!log_term_infos.empty() && log_term_infos.back().term == term)
         return;
 
-    chassert(log_term_infos.empty() || log_term_infos.back().term == term - 1);
     log_term_infos.push_back(LogTermInfo{.term = term, .first_index = index});
 }
 
@@ -1224,10 +1223,10 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
     }
     else if (auto it = logs_location.find(index); it != logs_location.end())
     {
-        const auto & [changelog_description, position, size] = it->second;
-        changelog_description->withLock(
+        it->second.file_description->withLock(
             [&]
             {
+                const auto & [changelog_description, position, size] = it->second;
                 auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings());
                 file->seek(position, SEEK_SET);
                 LOG_TRACE(
@@ -1282,6 +1281,10 @@ uint64_t LogEntryStorage::termAt(uint64_t index) const
 
 void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocation>> && indices_with_log_locations)
 {
+    /// if we have unlimited space in latest logs cache we don't need log location
+    if (latest_logs_cache.size_threshold == 0)
+        return;
+
     std::lock_guard lock(logs_location_mutex);
     unapplied_indices_with_log_locations.insert(
         unapplied_indices_with_log_locations.end(),
@@ -1291,7 +1294,8 @@ void LogEntryStorage::addLogLocations(std::vector<std::pair<uint64_t, LogLocatio
 
 void LogEntryStorage::refreshCache()
 {
-    if (latest_logs_cache.cache_size <= latest_logs_cache.size_threshold)
+    /// if we have unlimited space in latest logs cache we don't need log location
+    if (latest_logs_cache.size_threshold == 0)
         return;
 
     std::vector<IndexWithLogLocation> new_unapplied_indices_with_log_locations;
@@ -1343,11 +1347,11 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end
         if (!read_info)
             return;
 
-        const auto & [file_description, start_position, count] = *read_info;
-        LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count);
-        file_description->withLock(
+        LOG_TRACE(log, "Reading from path {} {} entries", read_info->file_description->path, read_info->count);
+        read_info->file_description->withLock(
             [&]
             {
+                const auto & [file_description, start_position, count] = *read_info;
                 auto file = file_description->disk->readFile(file_description->path);
                 file->seek(start_position, SEEK_SET);
 
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index d18f6b84283..f8f05afa24f 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -260,7 +260,7 @@ private:
     struct PrefetchInfo
     {
         std::vector<FileReadInfo> file_infos;
-        std::pair<size_t, size_t> commit_prefetch_index_range;
+        std::pair<uint64_t, uint64_t> commit_prefetch_index_range;
         std::atomic<bool> cancel;
         std::atomic<bool> done = false;
     };
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 61ff8c3f16a..332e47c0e62 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1819,11 +1819,9 @@ void testLogAndStateMachine(
 
             snapshot_task.create_snapshot(std::move(snapshot_task.snapshot));
         }
+
         if (snapshot_created && changelog.size() > settings->reserved_log_items)
-        {
-            keeper_context->setLastCommitIndex(i - settings->reserved_log_items);
             changelog.compact(i - settings->reserved_log_items);
-        }
     }
 
     SnapshotsQueue snapshots_queue1{1};

From 96d978243939536ba7877e9daa2e2a309d1aa5a3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 16 Feb 2024 19:16:02 +0300
Subject: [PATCH 759/884] Temporary table already exists exception message fix

---
 src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index fdd18c9bdeb..d6d0ed317d5 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1534,7 +1534,7 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder &
 
     std::lock_guard lock(mutex);
     if (external_tables_mapping.end() != external_tables_mapping.find(table_name))
-        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists.", backQuoteIfNeed(table_name));
+        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists", backQuoteIfNeed(table_name));
     external_tables_mapping.emplace(table_name, std::make_shared<TemporaryTableHolder>(std::move(temporary_table)));
 }
 

From 0c00e58353183811f604ebf2e058e5069a3a38f2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 16 Feb 2024 17:22:22 +0000
Subject: [PATCH 760/884] Fixing build.

---
 src/CMakeLists.txt           | 4 ++++
 src/Functions/CMakeLists.txt | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 08913ed1b5a..dff70e06ce4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -506,6 +506,10 @@ if (TARGET ch_contrib::s2)
     dbms_target_link_libraries (PUBLIC ch_contrib::s2)
 endif()
 
+if (TARGET ch_contrib::vectorscan)
+    dbms_target_link_libraries (PRIVATE ch_contrib::vectorscan)
+endif()
+
 if (TARGET ch_contrib::brotli)
     target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::brotli)
 endif()
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index a06e898b7c5..ac3e3671ae0 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -9,6 +9,11 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources
     FunctionHelpers.cpp
     extractTimeZoneFromFunctionArguments.cpp
     FunctionsLogical.cpp
+    if.cpp
+    multiIf.cpp
+    multiMatchAny.cpp
+    checkHyperscanRegexp.cpp
+    array/has.cpp
     CastOverloadResolver.cpp
 )
 extract_into_parent_list(clickhouse_functions_headers dbms_headers

From 8c66db0d1c7e4393fbc528c85964ed4e7ebdb22e Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 16 Feb 2024 19:21:25 +0000
Subject: [PATCH 761/884] Use max_query_size from context in DDLLogEntry
 instead of hardcoded 4096

---
 src/Interpreters/DDLTask.cpp | 5 +++--
 src/Interpreters/DDLTask.h   | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 90eec421abf..db45fa0d842 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -74,6 +74,8 @@ void DDLLogEntry::setSettingsIfRequired(ContextPtr context)
 
     if (version >= SETTINGS_IN_ZK_VERSION)
         settings.emplace(context->getSettingsRef().changes());
+
+    max_query_size = context->getSettingsRef().max_query_size;
 }
 
 String DDLLogEntry::toString() const
@@ -148,9 +150,8 @@ void DDLLogEntry::parse(const String & data)
             String settings_str;
             rb >> "settings: " >> settings_str >> "\n";
             ParserSetQuery parser{true};
-            constexpr UInt64 max_size = 4096;
             constexpr UInt64 max_depth = 16;
-            ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth);
+            ASTPtr settings_ast = parseQuery(parser, settings_str, max_query_size, max_depth);
             settings.emplace(std::move(settings_ast->as<ASTSetQuery>()->changes));
         }
     }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 5a8a5bfb184..a1f20576281 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -89,6 +89,7 @@ struct DDLLogEntry
     OpenTelemetry::TracingContext tracing_context;
     String initial_query_id;
     bool is_backup_restore = false;
+    size_t max_query_size = DBMS_DEFAULT_MAX_QUERY_SIZE;
 
     void setSettingsIfRequired(ContextPtr context);
     String toString() const;

From 4e6f04ddf1dbfcf925d0fc4a978e926b4fd7d358 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 16 Feb 2024 19:30:16 +0000
Subject: [PATCH 762/884] Address comments

---
 src/Interpreters/DDLTask.cpp | 4 +---
 src/Interpreters/DDLTask.h   | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index db45fa0d842..543d8b16791 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -74,8 +74,6 @@ void DDLLogEntry::setSettingsIfRequired(ContextPtr context)
 
     if (version >= SETTINGS_IN_ZK_VERSION)
         settings.emplace(context->getSettingsRef().changes());
-
-    max_query_size = context->getSettingsRef().max_query_size;
 }
 
 String DDLLogEntry::toString() const
@@ -151,7 +149,7 @@ void DDLLogEntry::parse(const String & data)
             rb >> "settings: " >> settings_str >> "\n";
             ParserSetQuery parser{true};
             constexpr UInt64 max_depth = 16;
-            ASTPtr settings_ast = parseQuery(parser, settings_str, max_query_size, max_depth);
+            ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth);
             settings.emplace(std::move(settings_ast->as<ASTSetQuery>()->changes));
         }
     }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index a1f20576281..5a8a5bfb184 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -89,7 +89,6 @@ struct DDLLogEntry
     OpenTelemetry::TracingContext tracing_context;
     String initial_query_id;
     bool is_backup_restore = false;
-    size_t max_query_size = DBMS_DEFAULT_MAX_QUERY_SIZE;
 
     void setSettingsIfRequired(ContextPtr context);
     String toString() const;

From b5ef034697188f7c2d61723800f718c55864193e Mon Sep 17 00:00:00 2001
From: Kirill Nikiforov <allmazz@allmazz.me>
Date: Fri, 16 Feb 2024 22:31:22 +0300
Subject: [PATCH 763/884] Implement system.dns_cache table (#59856)

* system.dns_cache table WIP

* system.dns_cache table

* rollback unexpected contrib changes

* rollback unexpected changes

* add docs

* code style fixes

* optimize dns cache exporting

* stateful test for system.dns_cache table

* stateful test for system.dns_cache table

* cr fixes

* stateful test for system.dns_cache table

* stateful test for system.dns_cache table

* Update 02981_system_dns_cache_table.sql

* Update 02981_system_dns_cache_table.reference

---------

Co-authored-by: pufit <pufit@clickhouse.com>
---
 docs/en/operations/system-tables/dns_cache.md | 34 ++++++++++
 src/Common/DNSResolver.cpp                    | 26 +++++---
 src/Common/DNSResolver.h                      |  9 ++-
 src/Storages/System/StorageSystemDNSCache.cpp | 65 +++++++++++++++++++
 src/Storages/System/StorageSystemDNSCache.h   | 25 +++++++
 src/Storages/System/attachSystemTables.cpp    |  2 +
 .../02981_system_dns_cache_table.reference    |  2 +
 .../02981_system_dns_cache_table.sql          |  2 +
 8 files changed, 156 insertions(+), 9 deletions(-)
 create mode 100644 docs/en/operations/system-tables/dns_cache.md
 create mode 100644 src/Storages/System/StorageSystemDNSCache.cpp
 create mode 100644 src/Storages/System/StorageSystemDNSCache.h
 create mode 100644 tests/queries/0_stateless/02981_system_dns_cache_table.reference
 create mode 100644 tests/queries/0_stateless/02981_system_dns_cache_table.sql

diff --git a/docs/en/operations/system-tables/dns_cache.md b/docs/en/operations/system-tables/dns_cache.md
new file mode 100644
index 00000000000..ed5a5cd506f
--- /dev/null
+++ b/docs/en/operations/system-tables/dns_cache.md
@@ -0,0 +1,34 @@
+---
+slug: /en/operations/system-tables/dns_cache
+---
+# dns_cache
+
+Contains information about cached DNS records.
+
+Columns:
+
+- `hostname` ([String](../../sql-reference/data-types/string.md)) — cached hostname
+- `ip_address` ([String](../../sql-reference/data-types/string.md)) — ip address for the hostname
+- `family` ([String](../../sql-reference/data-types/string.md)) — family of the ip address: `IPv4`, `IPv6`, or `UNIX_LOCAL`.
+- `cached_at` ([String](../../sql-reference/data-types/datetime.md)) - datetime when the record was cached
+
+**Example**
+
+Query:
+
+```sql
+SELECT * FROM system.dns_cache;
+```
+
+Result:
+
+| hostname | ip\_address | ip\_family | cached\_at |
+| :--- | :--- | :--- | :--- |
+| localhost | ::1 | IPv6 | 2024-02-11 17:04:40 |
+| localhost | 127.0.0.1 | IPv4 | 2024-02-11 17:04:40 |
+
+**See also**
+
+- [disable_internal_dns_cache setting](../../operations/server-configuration-parameters/settings.md#disable_internal_dns_cache)
+- [dns_cache_update_period setting](../../operations/server-configuration-parameters/settings.md#dns_cache_update_period)
+- [dns_max_consecutive_failures setting](../../operations/server-configuration-parameters/settings.md#dns_max_consecutive_failures)
\ No newline at end of file
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index fcbbaf6b0be..70646d46434 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -4,13 +4,10 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
-#include <Core/Names.h>
-#include <base/types.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/DNS.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/NumberParser.h>
-#include <arpa/inet.h>
 #include <atomic>
 #include <optional>
 #include <string_view>
@@ -141,10 +138,10 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
     return addresses;
 }
 
-DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase<std::string, DNSResolver::IPAddresses> & cache, const std::string & host)
+DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase<std::string, DNSResolver::CacheEntry> & cache, const std::string & host)
 {
-    auto [result, _ ] = cache.getOrSet(host, [&host]() { return std::make_shared<DNSResolver::IPAddresses>(resolveIPAddressImpl(host)); });
-    return *result;
+    auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared<DNSResolver::CacheEntry>(resolveIPAddressImpl(host), std::chrono::system_clock::now());});
+    return result->addresses;
 }
 
 std::unordered_set<String> reverseResolveImpl(const Poco::Net::IPAddress & address)
@@ -179,7 +176,7 @@ struct DNSResolver::Impl
     using HostWithConsecutiveFailures = std::unordered_map<String, UInt32>;
     using AddressWithConsecutiveFailures = std::unordered_map<Poco::Net::IPAddress, UInt32>;
 
-    CacheBase<std::string, DNSResolver::IPAddresses> cache_host{100};
+    CacheBase<std::string, DNSResolver::CacheEntry> cache_host{100};
     CacheBase<Poco::Net::IPAddress, std::unordered_set<std::string>> cache_address{100};
 
     std::mutex drop_mutex;
@@ -411,7 +408,7 @@ bool DNSResolver::updateHost(const String & host)
     const auto old_value = resolveIPAddressWithCache(impl->cache_host, host);
     auto new_value = resolveIPAddressImpl(host);
     const bool result = old_value != new_value;
-    impl->cache_host.set(host, std::make_shared<DNSResolver::IPAddresses>(std::move(new_value)));
+    impl->cache_host.set(host, std::make_shared<DNSResolver::CacheEntry>(std::move(new_value), std::chrono::system_clock::now()));
     return result;
 }
 
@@ -438,6 +435,19 @@ void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
     impl->new_addresses.insert({address, consecutive_failures});
 }
 
+std::vector<std::pair<std::string, DNSResolver::CacheEntry>> DNSResolver::cacheEntries() const
+{
+    std::lock_guard lock(impl->drop_mutex);
+    std::vector<std::pair<std::string, DNSResolver::CacheEntry>> entries;
+
+    for (auto & [key, entry] : impl->cache_host.dump())
+    {
+        entries.emplace_back(std::move(key), *entry);
+    }
+
+    return entries;
+}
+
 DNSResolver::~DNSResolver() = default;
 
 DNSResolver & DNSResolver::instance()
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index 965688f84f2..38f19791051 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -20,7 +20,11 @@ class DNSResolver : private boost::noncopyable
 {
 public:
     using IPAddresses = std::vector<Poco::Net::IPAddress>;
-    using IPAddressesPtr = std::shared_ptr<IPAddresses>;
+    using CacheEntry = struct
+    {
+        IPAddresses addresses;
+        std::chrono::system_clock::time_point cached_at;
+    };
 
     static DNSResolver & instance();
 
@@ -58,6 +62,9 @@ public:
     /// Returns true if IP of any host has been changed or an element was dropped (too many failures)
     bool updateCache(UInt32 max_consecutive_failures);
 
+    /// Returns a copy of cache entries
+    std::vector<std::pair<std::string, CacheEntry>> cacheEntries() const;
+
     ~DNSResolver();
 
 private:
diff --git a/src/Storages/System/StorageSystemDNSCache.cpp b/src/Storages/System/StorageSystemDNSCache.cpp
new file mode 100644
index 00000000000..828c120fcb6
--- /dev/null
+++ b/src/Storages/System/StorageSystemDNSCache.cpp
@@ -0,0 +1,65 @@
+#include <Access/ContextAccess.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <Interpreters/Context.h>
+#include <Storages/System/StorageSystemDNSCache.h>
+#include <Common/DNSResolver.h>
+#include "StorageSystemDatabases.h"
+
+namespace DB
+{
+
+ColumnsDescription StorageSystemDNSCache::getColumnsDescription()
+{
+    return ColumnsDescription
+        {
+            {"hostname", std::make_shared<DataTypeString>(), "Hostname."},
+            {"ip_address", std::make_shared<DataTypeString>(), "IP address."},
+            {"ip_family", std::make_shared<DataTypeString>(), "IP address family."},
+            {"cached_at", std::make_shared<DataTypeDateTime>(), "Record cached timestamp."},
+        };
+}
+
+void StorageSystemDNSCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
+{
+    using HostIPPair = std::pair<std::string, std::string>;
+    std::set<HostIPPair> reported_elements;
+
+    for (const auto & [hostname, entry] : DNSResolver::instance().cacheEntries())
+    {
+        for (const auto &address : entry.addresses)
+        {
+            std::string ip = address.toString();
+
+            // Cache might report the same ip address multiple times. Report only one of them.
+            if (reported_elements.contains(HostIPPair(hostname, ip)))
+                continue;
+
+            reported_elements.insert(HostIPPair(hostname, ip));
+
+            std::string family_str;
+            switch (address.family())
+            {
+                case Poco::Net::AddressFamily::IPv4:
+                    family_str = "IPv4";
+                    break;
+                case Poco::Net::AddressFamily::IPv6:
+                    family_str = "IPv6";
+                    break;
+                case Poco::Net::AddressFamily::UNIX_LOCAL:
+                    family_str = "UNIX_LOCAL";
+                    break;
+            }
+
+            size_t i = 0;
+            res_columns[i++]->insert(hostname);
+            res_columns[i++]->insert(ip);
+            res_columns[i++]->insert(family_str);
+            res_columns[i++]->insert(static_cast<UInt32>(std::chrono::system_clock::to_time_t(entry.cached_at)));
+        }
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDNSCache.h b/src/Storages/System/StorageSystemDNSCache.h
new file mode 100644
index 00000000000..340060335b8
--- /dev/null
+++ b/src/Storages/System/StorageSystemDNSCache.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+/// system.dns_cache table.
+class StorageSystemDNSCache final : public IStorageSystemOneBlock<StorageSystemDNSCache>
+{
+public:
+    std::string getName() const override { return "SystemDNSCache"; }
+
+    static ColumnsDescription getColumnsDescription();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index bf898f57833..f08d6972b98 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -90,6 +90,7 @@
 #include <Storages/System/StorageSystemS3Queue.h>
 #include <Storages/System/StorageSystemDashboards.h>
 #include <Storages/System/StorageSystemViewRefreshes.h>
+#include <Storages/System/StorageSystemDNSCache.h>
 
 #if defined(__ELF__) && !defined(OS_FREEBSD)
 #include <Storages/System/StorageSystemSymbols.h>
@@ -166,6 +167,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables", "Contains a list of tables which were dropped from Atomic databases but not completely removed yet.");
     attach<StorageSystemDroppedTablesParts>(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables ");
     attach<StorageSystemScheduler>(context, system_database, "scheduler", "Contains information and status for scheduling nodes residing on the local server.");
+    attach<StorageSystemDNSCache>(context, system_database, "dns_cache", "Contains information about cached DNS records.");
 #if defined(__ELF__) && !defined(OS_FREEBSD)
     attach<StorageSystemSymbols>(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers.");
 #endif
diff --git a/tests/queries/0_stateless/02981_system_dns_cache_table.reference b/tests/queries/0_stateless/02981_system_dns_cache_table.reference
new file mode 100644
index 00000000000..0e51a855bbe
--- /dev/null
+++ b/tests/queries/0_stateless/02981_system_dns_cache_table.reference
@@ -0,0 +1,2 @@
+Ok.
+localhost	127.0.0.1	IPv4	1
diff --git a/tests/queries/0_stateless/02981_system_dns_cache_table.sql b/tests/queries/0_stateless/02981_system_dns_cache_table.sql
new file mode 100644
index 00000000000..87bb91ca29a
--- /dev/null
+++ b/tests/queries/0_stateless/02981_system_dns_cache_table.sql
@@ -0,0 +1,2 @@
+SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers());
+SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' AND ip_family = 'IPv4';

From 731c484b3d03edb7e1d62f5d97913b5658cfe375 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 01:46:41 +0100
Subject: [PATCH 764/884] Revert "Implement system.dns_cache table (#59856)"

This reverts commit b5ef034697188f7c2d61723800f718c55864193e.
---
 docs/en/operations/system-tables/dns_cache.md | 34 ----------
 src/Common/DNSResolver.cpp                    | 26 +++-----
 src/Common/DNSResolver.h                      |  9 +--
 src/Storages/System/StorageSystemDNSCache.cpp | 65 -------------------
 src/Storages/System/StorageSystemDNSCache.h   | 25 -------
 src/Storages/System/attachSystemTables.cpp    |  2 -
 .../02981_system_dns_cache_table.reference    |  2 -
 .../02981_system_dns_cache_table.sql          |  2 -
 8 files changed, 9 insertions(+), 156 deletions(-)
 delete mode 100644 docs/en/operations/system-tables/dns_cache.md
 delete mode 100644 src/Storages/System/StorageSystemDNSCache.cpp
 delete mode 100644 src/Storages/System/StorageSystemDNSCache.h
 delete mode 100644 tests/queries/0_stateless/02981_system_dns_cache_table.reference
 delete mode 100644 tests/queries/0_stateless/02981_system_dns_cache_table.sql

diff --git a/docs/en/operations/system-tables/dns_cache.md b/docs/en/operations/system-tables/dns_cache.md
deleted file mode 100644
index ed5a5cd506f..00000000000
--- a/docs/en/operations/system-tables/dns_cache.md
+++ /dev/null
@@ -1,34 +0,0 @@
----
-slug: /en/operations/system-tables/dns_cache
----
-# dns_cache
-
-Contains information about cached DNS records.
-
-Columns:
-
-- `hostname` ([String](../../sql-reference/data-types/string.md)) — cached hostname
-- `ip_address` ([String](../../sql-reference/data-types/string.md)) — ip address for the hostname
-- `family` ([String](../../sql-reference/data-types/string.md)) — family of the ip address: `IPv4`, `IPv6`, or `UNIX_LOCAL`.
-- `cached_at` ([String](../../sql-reference/data-types/datetime.md)) - datetime when the record was cached
-
-**Example**
-
-Query:
-
-```sql
-SELECT * FROM system.dns_cache;
-```
-
-Result:
-
-| hostname | ip\_address | ip\_family | cached\_at |
-| :--- | :--- | :--- | :--- |
-| localhost | ::1 | IPv6 | 2024-02-11 17:04:40 |
-| localhost | 127.0.0.1 | IPv4 | 2024-02-11 17:04:40 |
-
-**See also**
-
-- [disable_internal_dns_cache setting](../../operations/server-configuration-parameters/settings.md#disable_internal_dns_cache)
-- [dns_cache_update_period setting](../../operations/server-configuration-parameters/settings.md#dns_cache_update_period)
-- [dns_max_consecutive_failures setting](../../operations/server-configuration-parameters/settings.md#dns_max_consecutive_failures)
\ No newline at end of file
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 70646d46434..fcbbaf6b0be 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -4,10 +4,13 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
+#include <Core/Names.h>
+#include <base/types.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/DNS.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/NumberParser.h>
+#include <arpa/inet.h>
 #include <atomic>
 #include <optional>
 #include <string_view>
@@ -138,10 +141,10 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
     return addresses;
 }
 
-DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase<std::string, DNSResolver::CacheEntry> & cache, const std::string & host)
+DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase<std::string, DNSResolver::IPAddresses> & cache, const std::string & host)
 {
-    auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared<DNSResolver::CacheEntry>(resolveIPAddressImpl(host), std::chrono::system_clock::now());});
-    return result->addresses;
+    auto [result, _ ] = cache.getOrSet(host, [&host]() { return std::make_shared<DNSResolver::IPAddresses>(resolveIPAddressImpl(host)); });
+    return *result;
 }
 
 std::unordered_set<String> reverseResolveImpl(const Poco::Net::IPAddress & address)
@@ -176,7 +179,7 @@ struct DNSResolver::Impl
     using HostWithConsecutiveFailures = std::unordered_map<String, UInt32>;
     using AddressWithConsecutiveFailures = std::unordered_map<Poco::Net::IPAddress, UInt32>;
 
-    CacheBase<std::string, DNSResolver::CacheEntry> cache_host{100};
+    CacheBase<std::string, DNSResolver::IPAddresses> cache_host{100};
     CacheBase<Poco::Net::IPAddress, std::unordered_set<std::string>> cache_address{100};
 
     std::mutex drop_mutex;
@@ -408,7 +411,7 @@ bool DNSResolver::updateHost(const String & host)
     const auto old_value = resolveIPAddressWithCache(impl->cache_host, host);
     auto new_value = resolveIPAddressImpl(host);
     const bool result = old_value != new_value;
-    impl->cache_host.set(host, std::make_shared<DNSResolver::CacheEntry>(std::move(new_value), std::chrono::system_clock::now()));
+    impl->cache_host.set(host, std::make_shared<DNSResolver::IPAddresses>(std::move(new_value)));
     return result;
 }
 
@@ -435,19 +438,6 @@ void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
     impl->new_addresses.insert({address, consecutive_failures});
 }
 
-std::vector<std::pair<std::string, DNSResolver::CacheEntry>> DNSResolver::cacheEntries() const
-{
-    std::lock_guard lock(impl->drop_mutex);
-    std::vector<std::pair<std::string, DNSResolver::CacheEntry>> entries;
-
-    for (auto & [key, entry] : impl->cache_host.dump())
-    {
-        entries.emplace_back(std::move(key), *entry);
-    }
-
-    return entries;
-}
-
 DNSResolver::~DNSResolver() = default;
 
 DNSResolver & DNSResolver::instance()
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index 38f19791051..965688f84f2 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -20,11 +20,7 @@ class DNSResolver : private boost::noncopyable
 {
 public:
     using IPAddresses = std::vector<Poco::Net::IPAddress>;
-    using CacheEntry = struct
-    {
-        IPAddresses addresses;
-        std::chrono::system_clock::time_point cached_at;
-    };
+    using IPAddressesPtr = std::shared_ptr<IPAddresses>;
 
     static DNSResolver & instance();
 
@@ -62,9 +58,6 @@ public:
     /// Returns true if IP of any host has been changed or an element was dropped (too many failures)
     bool updateCache(UInt32 max_consecutive_failures);
 
-    /// Returns a copy of cache entries
-    std::vector<std::pair<std::string, CacheEntry>> cacheEntries() const;
-
     ~DNSResolver();
 
 private:
diff --git a/src/Storages/System/StorageSystemDNSCache.cpp b/src/Storages/System/StorageSystemDNSCache.cpp
deleted file mode 100644
index 828c120fcb6..00000000000
--- a/src/Storages/System/StorageSystemDNSCache.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <Access/ContextAccess.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <Interpreters/Context.h>
-#include <Storages/System/StorageSystemDNSCache.h>
-#include <Common/DNSResolver.h>
-#include "StorageSystemDatabases.h"
-
-namespace DB
-{
-
-ColumnsDescription StorageSystemDNSCache::getColumnsDescription()
-{
-    return ColumnsDescription
-        {
-            {"hostname", std::make_shared<DataTypeString>(), "Hostname."},
-            {"ip_address", std::make_shared<DataTypeString>(), "IP address."},
-            {"ip_family", std::make_shared<DataTypeString>(), "IP address family."},
-            {"cached_at", std::make_shared<DataTypeDateTime>(), "Record cached timestamp."},
-        };
-}
-
-void StorageSystemDNSCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
-{
-    using HostIPPair = std::pair<std::string, std::string>;
-    std::set<HostIPPair> reported_elements;
-
-    for (const auto & [hostname, entry] : DNSResolver::instance().cacheEntries())
-    {
-        for (const auto &address : entry.addresses)
-        {
-            std::string ip = address.toString();
-
-            // Cache might report the same ip address multiple times. Report only one of them.
-            if (reported_elements.contains(HostIPPair(hostname, ip)))
-                continue;
-
-            reported_elements.insert(HostIPPair(hostname, ip));
-
-            std::string family_str;
-            switch (address.family())
-            {
-                case Poco::Net::AddressFamily::IPv4:
-                    family_str = "IPv4";
-                    break;
-                case Poco::Net::AddressFamily::IPv6:
-                    family_str = "IPv6";
-                    break;
-                case Poco::Net::AddressFamily::UNIX_LOCAL:
-                    family_str = "UNIX_LOCAL";
-                    break;
-            }
-
-            size_t i = 0;
-            res_columns[i++]->insert(hostname);
-            res_columns[i++]->insert(ip);
-            res_columns[i++]->insert(family_str);
-            res_columns[i++]->insert(static_cast<UInt32>(std::chrono::system_clock::to_time_t(entry.cached_at)));
-        }
-    }
-}
-
-}
diff --git a/src/Storages/System/StorageSystemDNSCache.h b/src/Storages/System/StorageSystemDNSCache.h
deleted file mode 100644
index 340060335b8..00000000000
--- a/src/Storages/System/StorageSystemDNSCache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-
-#include <Storages/System/IStorageSystemOneBlock.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/// system.dns_cache table.
-class StorageSystemDNSCache final : public IStorageSystemOneBlock<StorageSystemDNSCache>
-{
-public:
-    std::string getName() const override { return "SystemDNSCache"; }
-
-    static ColumnsDescription getColumnsDescription();
-
-protected:
-    using IStorageSystemOneBlock::IStorageSystemOneBlock;
-
-    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
-};
-
-}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index f08d6972b98..bf898f57833 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -90,7 +90,6 @@
 #include <Storages/System/StorageSystemS3Queue.h>
 #include <Storages/System/StorageSystemDashboards.h>
 #include <Storages/System/StorageSystemViewRefreshes.h>
-#include <Storages/System/StorageSystemDNSCache.h>
 
 #if defined(__ELF__) && !defined(OS_FREEBSD)
 #include <Storages/System/StorageSystemSymbols.h>
@@ -167,7 +166,6 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables", "Contains a list of tables which were dropped from Atomic databases but not completely removed yet.");
     attach<StorageSystemDroppedTablesParts>(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables ");
     attach<StorageSystemScheduler>(context, system_database, "scheduler", "Contains information and status for scheduling nodes residing on the local server.");
-    attach<StorageSystemDNSCache>(context, system_database, "dns_cache", "Contains information about cached DNS records.");
 #if defined(__ELF__) && !defined(OS_FREEBSD)
     attach<StorageSystemSymbols>(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers.");
 #endif
diff --git a/tests/queries/0_stateless/02981_system_dns_cache_table.reference b/tests/queries/0_stateless/02981_system_dns_cache_table.reference
deleted file mode 100644
index 0e51a855bbe..00000000000
--- a/tests/queries/0_stateless/02981_system_dns_cache_table.reference
+++ /dev/null
@@ -1,2 +0,0 @@
-Ok.
-localhost	127.0.0.1	IPv4	1
diff --git a/tests/queries/0_stateless/02981_system_dns_cache_table.sql b/tests/queries/0_stateless/02981_system_dns_cache_table.sql
deleted file mode 100644
index 87bb91ca29a..00000000000
--- a/tests/queries/0_stateless/02981_system_dns_cache_table.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers());
-SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' AND ip_family = 'IPv4';

From 4c2ee234299f26d3716c0e952a78147a49faa651 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 01:50:20 +0100
Subject: [PATCH 765/884] Update insert-into.md

---
 docs/en/sql-reference/statements/insert-into.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index 2eebd62cbc6..f5544f96750 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -210,7 +210,7 @@ By default, services on ClickHouse Cloud provide multiple replicas for high avai
 
 After an `INSERT` succeeds, data is written to the underlying storage. However, it may take some time for replicas to receive these updates. Therefore, if you use a different connection that executes a `SELECT` query on one of these other replicas, the updated data may not yet be reflected.
 
-It is possible to use the `select_sequential_consistency` to force replicas to receive the latest updates. Here is an example of a SELECT query using this setting:
+It is possible to use the `select_sequential_consistency` to force the replica to receive the latest updates. Here is an example of a SELECT query using this setting:
 
 ```sql
 SELECT .... SETTINGS select_sequential_consistency = 1;

From 509d8ee99ca259db0233fe4c174541753169256d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 01:54:54 +0100
Subject: [PATCH 766/884] Apply review comments

---
 programs/server/Server.cpp |  2 ++
 src/Common/CPUID.h         | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 593c90e44b5..0a3c23d746a 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -713,6 +713,7 @@ try
         getNumberOfPhysicalCPUCores(),  // on ARM processors it can show only enabled at current moment cores
         std::thread::hardware_concurrency());
 
+#if defined(__x86_64__)
     String cpu_info;
 #define COLLECT_FLAG(X) \
     if (CPU::have##X()) \
@@ -726,6 +727,7 @@ try
 #undef COLLECT_FLAG
 
     LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
+#endif
 
     sanityChecks(*this);
 
diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h
index 44b608ac1fe..b47e7e808d7 100644
--- a/src/Common/CPUID.h
+++ b/src/Common/CPUID.h
@@ -2,7 +2,7 @@
 
 #include <base/types.h>
 
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
 #include <cpuid.h>
 #endif
 
@@ -14,7 +14,7 @@ namespace DB
 namespace CPU
 {
 
-#if (defined(__x86_64__) || defined(__i386__))
+#if (defined(__x86_64__))
 /// Our version is independent of -mxsave option, because we do dynamic dispatch.
 inline UInt64 our_xgetbv(UInt32 xcr) noexcept
 {
@@ -30,7 +30,7 @@ inline UInt64 our_xgetbv(UInt32 xcr) noexcept
 
 inline bool cpuid(UInt32 op, UInt32 sub_op, UInt32 * res) noexcept /// NOLINT
 {
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
     __cpuid_count(op, sub_op, res[0], res[1], res[2], res[3]);
     return true;
 #else
@@ -45,7 +45,7 @@ inline bool cpuid(UInt32 op, UInt32 sub_op, UInt32 * res) noexcept /// NOLINT
 
 inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
 {
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
     __cpuid(op, res[0], res[1], res[2], res[3]);
     return true;
 #else
@@ -186,7 +186,7 @@ bool haveOSXSAVE() noexcept
 
 bool haveAVX() noexcept
 {
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
     // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
     return haveOSXSAVE()                           // implies haveXSAVE()
@@ -219,7 +219,7 @@ bool haveBMI2() noexcept
 
 bool haveAVX512F() noexcept
 {
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
     // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
     return haveOSXSAVE()                           // implies haveXSAVE()
            && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
@@ -318,7 +318,7 @@ bool haveRDRAND() noexcept
 
 inline bool haveAMX() noexcept
 {
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__)
     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
     return haveOSXSAVE()                           // implies haveXSAVE()
            && ((our_xgetbv(0) >> 17) & 0x3) == 0x3;        // AMX state are enabled by OS

From 2f315e0eb54f1b9d93b6c5406287d83f748530ed Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 02:14:15 +0100
Subject: [PATCH 767/884] Style check for abbreviations

---
 src/Client/ConnectionParameters.cpp                    |  2 +-
 src/Dictionaries/YAMLRegExpTreeDictionarySource.cpp    |  2 +-
 src/Functions/serverConstants.cpp                      | 10 +++++-----
 src/IO/ConnectionTimeouts.cpp                          |  6 +++---
 src/IO/ConnectionTimeouts.h                            |  4 ++--
 src/IO/S3/Credentials.cpp                              |  5 +----
 src/IO/S3/PocoHTTPClient.cpp                           |  4 ++--
 src/Interpreters/Context.cpp                           |  2 +-
 src/Interpreters/Context.h                             |  2 +-
 src/Interpreters/InterpreterShowFunctionsQuery.cpp     |  4 ++--
 src/Interpreters/Session.cpp                           |  4 ++--
 src/Interpreters/Session.h                             |  2 +-
 .../Kusto/KustoFunctions/KQLFunctionFactory.cpp        |  4 ++--
 .../Kusto/KustoFunctions/KQLStringFunctions.cpp        |  4 ++--
 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h  |  4 ++--
 src/Processors/Formats/Impl/AvroRowInputFormat.cpp     |  4 ++--
 src/Server/HTTPHandler.cpp                             |  8 ++++----
 src/Storages/StorageURL.cpp                            |  2 +-
 utils/check-style/check-style                          |  6 ++++++
 19 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index a0ae47f0620..5c22b6c6d3f 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -125,7 +125,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
                 Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0))
             .withReceiveTimeout(
                 Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0))
-            .withTcpKeepAliveTimeout(
+            .withTCPKeepAliveTimeout(
                 Poco::Timespan(config.getInt("tcp_keep_alive_timeout", DEFAULT_TCP_KEEP_ALIVE_TIMEOUT), 0))
             .withHandshakeTimeout(
                 Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000) * 1000))
diff --git a/src/Dictionaries/YAMLRegExpTreeDictionarySource.cpp b/src/Dictionaries/YAMLRegExpTreeDictionarySource.cpp
index f1591943a12..b35e507b242 100644
--- a/src/Dictionaries/YAMLRegExpTreeDictionarySource.cpp
+++ b/src/Dictionaries/YAMLRegExpTreeDictionarySource.cpp
@@ -227,7 +227,7 @@ void parseMatchNode(UInt64 parent_id, UInt64 & id, const YAML::Node & node, Resu
 
     if (!match.contains(key_name))
     {
-        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Yaml match rule must contain key {}", key_name);
+        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "YAML match rule must contain key {}", key_name);
     }
     for (const auto & [key, node_] : match)
     {
diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp
index 9f1a3584df8..fd8fb22455b 100644
--- a/src/Functions/serverConstants.cpp
+++ b/src/Functions/serverConstants.cpp
@@ -51,12 +51,12 @@ namespace
     };
 
 
-    class FunctionTcpPort : public FunctionConstantBase<FunctionTcpPort, UInt16, DataTypeUInt16>
+    class FunctionTCPPort : public FunctionConstantBase<FunctionTCPPort, UInt16, DataTypeUInt16>
     {
     public:
         static constexpr auto name = "tcpPort";
-        static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTcpPort>(context); }
-        explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {}
+        static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTCPPort>(context); }
+        explicit FunctionTCPPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {}
     };
 
 
@@ -153,9 +153,9 @@ REGISTER_FUNCTION(ServerUUID)
     factory.registerFunction<FunctionServerUUID>();
 }
 
-REGISTER_FUNCTION(TcpPort)
+REGISTER_FUNCTION(TCPPort)
 {
-    factory.registerFunction<FunctionTcpPort>();
+    factory.registerFunction<FunctionTCPPort>();
 }
 
 REGISTER_FUNCTION(Timezone)
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index ecc0d64580b..f2db3169400 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -20,7 +20,7 @@ ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Setti
         .withConnectionTimeout(settings.connect_timeout)
         .withSendTimeout(settings.send_timeout)
         .withReceiveTimeout(settings.receive_timeout)
-        .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout)
+        .withTCPKeepAliveTimeout(settings.tcp_keep_alive_timeout)
         .withHandshakeTimeout(settings.handshake_timeout_ms)
         .withHedgedConnectionTimeout(settings.hedged_connection_timeout_ms)
         .withReceiveDataTimeout(settings.receive_data_timeout_ms);
@@ -40,8 +40,8 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         .withConnectionTimeout(settings.http_connection_timeout)
         .withSendTimeout(settings.http_send_timeout)
         .withReceiveTimeout(settings.http_receive_timeout)
-        .withHttpKeepAliveTimeout(http_keep_alive_timeout)
-        .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout)
+        .withHTTPKeepAliveTimeout(http_keep_alive_timeout)
+        .withTCPKeepAliveTimeout(settings.tcp_keep_alive_timeout)
         .withHandshakeTimeout(settings.handshake_timeout_ms);
 }
 
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index 6967af08204..7fe97b5ec36 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -16,8 +16,8 @@ struct Settings;
     M(secure_connection_timeout, withSecureConnectionTimeout) \
     M(send_timeout, withSendTimeout) \
     M(receive_timeout, withReceiveTimeout) \
-    M(tcp_keep_alive_timeout, withTcpKeepAliveTimeout) \
-    M(http_keep_alive_timeout, withHttpKeepAliveTimeout) \
+    M(tcp_keep_alive_timeout, withTCPKeepAliveTimeout) \
+    M(http_keep_alive_timeout, withHTTPKeepAliveTimeout) \
     M(hedged_connection_timeout, withHedgedConnectionTimeout) \
     M(receive_data_timeout, withReceiveDataTimeout) \
     M(handshake_timeout, withHandshakeTimeout) \
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index e64f54b99ad..80366510b53 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -22,7 +22,6 @@ namespace ErrorCodes
 #    include <aws/core/utils/UUID.h>
 #    include <aws/core/http/HttpClientFactory.h>
 
-#    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <aws/core/utils/HashingUtils.h>
 #    include <aws/core/platform/FileSystem.h>
 
@@ -31,9 +30,7 @@ namespace ErrorCodes
 #    include <IO/S3/Client.h>
 
 #    include <fstream>
-#    include <base/EnumReflection.h>
 
-#    include <boost/algorithm/string.hpp>
 #    include <boost/algorithm/string/split.hpp>
 #    include <boost/algorithm/string/classification.hpp>
 #    include <Poco/Exception.h>
@@ -755,7 +752,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
                 configuration.put_request_throttler,
                 Aws::Http::SchemeMapper::ToString(Aws::Http::Scheme::HTTP));
 
-            /// See MakeDefaultHttpResourceClientConfiguration().
+            /// See MakeDefaultHTTPResourceClientConfiguration().
             /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
             /// of contrib/aws/aws-cpp-sdk-core/source/internal/AWSHttpResourceClient.cpp
             aws_client_configuration.maxConnections = 2;
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 21acdfd69f2..dbb93e63143 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -146,9 +146,9 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio
         .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000))
         .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
         .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
-        .withTcpKeepAliveTimeout(Poco::Timespan(
+        .withTCPKeepAliveTimeout(Poco::Timespan(
             client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))
-        .withHttpKeepAliveTimeout(Poco::Timespan(
+        .withHTTPKeepAliveTimeout(Poco::Timespan(
             client_configuration.http_keep_alive_timeout_ms * 1000)); /// flag indicating whether keep-alive is enabled is set to each session upon creation
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index fdd18c9bdeb..1dbf54e675b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4513,7 +4513,7 @@ void Context::setClientConnectionId(uint32_t connection_id_)
     client_info.connection_id = connection_id_;
 }
 
-void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+void Context::setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
 {
     client_info.http_method = http_method;
     client_info.http_user_agent = http_user_agent;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index cdd188faa48..a7908d45a9b 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -630,7 +630,7 @@ public:
     void setClientInterface(ClientInfo::Interface interface);
     void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
     void setClientConnectionId(uint32_t connection_id);
-    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+    void setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
     void setForwardedFor(const String & forwarded_for);
     void setQueryKind(ClientInfo::QueryKind query_kind);
     void setQueryKindInitial();
diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp
index e83f61eac53..829670d7929 100644
--- a/src/Interpreters/InterpreterShowFunctionsQuery.cpp
+++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp
@@ -25,13 +25,13 @@ String InterpreterShowFunctionsQuery::getRewrittenQuery()
 
     const auto & query = query_ptr->as<ASTShowFunctionsQuery &>();
 
-    DatabasePtr systemDb = DatabaseCatalog::instance().getSystemDatabase();
+    DatabasePtr system_db = DatabaseCatalog::instance().getSystemDatabase();
 
     String rewritten_query = fmt::format(
         R"(
 SELECT *
 FROM {}.{})",
-        systemDb->getDatabaseName(),
+        system_db->getDatabaseName(),
         functions_table);
 
     if (!query.like.empty())
diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index df97a09f686..b52f8a507e3 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -429,11 +429,11 @@ void Session::setClientConnectionId(uint32_t connection_id)
         prepared_client_info->connection_id = connection_id;
 }
 
-void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+void Session::setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
 {
     if (session_context)
     {
-        session_context->setHttpClientInfo(http_method, http_user_agent, http_referer);
+        session_context->setHTTPClientInfo(http_method, http_user_agent, http_referer);
     }
     else
     {
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index cde000d89fa..334560a33c8 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -65,7 +65,7 @@ public:
     void setClientInterface(ClientInfo::Interface interface);
     void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
     void setClientConnectionId(uint32_t connection_id);
-    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+    void setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
     void setForwardedFor(const String & forwarded_for);
     void setQuotaClientKey(const String & quota_key);
     void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp
index adac892b49d..044cc2e0622 100644
--- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp
+++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp
@@ -359,7 +359,7 @@ std::unique_ptr<IParserKQLFunction> KQLFunctionFactory::get(String & kql_functio
             return std::make_unique<ExtractAll>();
 
         case KQLFunctionValue::extract_json:
-            return std::make_unique<ExtractJson>();
+            return std::make_unique<ExtractJSON>();
 
         case KQLFunctionValue::has_any_index:
             return std::make_unique<HasAnyIndex>();
@@ -389,7 +389,7 @@ std::unique_ptr<IParserKQLFunction> KQLFunctionFactory::get(String & kql_functio
             return std::make_unique<ParseCSV>();
 
         case KQLFunctionValue::parse_json:
-            return std::make_unique<ParseJson>();
+            return std::make_unique<ParseJSON>();
 
         case KQLFunctionValue::parse_url:
             return std::make_unique<ParseURL>();
diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp
index 0f9ca67d6dc..afb8809c69e 100644
--- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp
+++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp
@@ -240,7 +240,7 @@ bool ExtractAll::convertImpl(String & out, IParser::Pos & pos)
     return true;
 }
 
-bool ExtractJson::convertImpl(String & out, IParser::Pos & pos)
+bool ExtractJSON::convertImpl(String & out, IParser::Pos & pos)
 {
     String datatype = "String";
     ParserKeyword s_kql("typeof");
@@ -431,7 +431,7 @@ bool ParseCSV::convertImpl(String & out, IParser::Pos & pos)
     return true;
 }
 
-bool ParseJson::convertImpl(String & out, IParser::Pos & pos)
+bool ParseJSON::convertImpl(String & out, IParser::Pos & pos)
 {
     const String fn_name = getKQLFunctionName(pos);
     if (fn_name.empty())
diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
index 492a59263ec..e55d07defd0 100644
--- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
+++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
@@ -62,7 +62,7 @@ protected:
     bool convertImpl(String & out, IParser::Pos & pos) override;
 };
 
-class ExtractJson : public IParserKQLFunction
+class ExtractJSON : public IParserKQLFunction
 {
 protected:
     const char * getName() const override { return "extract_json(), extractjson()"; }
@@ -125,7 +125,7 @@ protected:
     bool convertImpl(String & out, IParser::Pos & pos) override;
 };
 
-class ParseJson : public IParserKQLFunction
+class ParseJJSON : public IParserKQLFunction
 {
 protected:
     const char * getName() const override { return "parse_json()"; }
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 8dc8fa516dc..8ef2cda5587 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -212,7 +212,7 @@ static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::No
     };
 }
 
-static std::string nodeToJson(avro::NodePtr root_node)
+static std::string nodeToJSON(avro::NodePtr root_node)
 {
     std::ostringstream ss;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     ss.exceptions(std::ios::failbit);
@@ -641,7 +641,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
 
     throw Exception(ErrorCodes::ILLEGAL_COLUMN,
         "Type {} is not compatible with Avro {}:\n{}",
-        target_type->getName(), avro::toString(root_node->type()), nodeToJson(root_node));
+        target_type->getName(), avro::toString(root_node->type()), nodeToJSON(root_node));
 }
 
 AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(const avro::NodePtr & root_node)
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 72e7c5552f8..35a95c0534d 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -125,7 +125,7 @@ namespace ErrorCodes
 
 namespace
 {
-bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
+bool tryAddHTTPOptionHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
 {
     if (config.has("http_options_response"))
     {
@@ -153,7 +153,7 @@ bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco
 void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config)
 {
     /// If can add some headers from config
-    if (tryAddHttpOptionHeadersFromConfig(response, config))
+    if (tryAddHTTPOptionHeadersFromConfig(response, config))
     {
         response.setKeepAlive(false);
         response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT);
@@ -496,7 +496,7 @@ bool HTTPHandler::authenticateUser(
     else if (request.getMethod() == HTTPServerRequest::HTTP_POST)
         http_method = ClientInfo::HTTPMethod::POST;
 
-    session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""));
+    session->setHTTPClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""));
     session->setForwardedFor(request.get("X-Forwarded-For", ""));
     session->setQuotaClientKey(quota_key);
 
@@ -1065,7 +1065,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
         response.set("X-ClickHouse-Server-Display-Name", server_display_name);
 
         if (!request.get("Origin", "").empty())
-            tryAddHttpOptionHeadersFromConfig(response, server.config());
+            tryAddHTTPOptionHeadersFromConfig(response, server.config());
 
         /// For keep-alive to work.
         if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 547342cf439..6f3599630d3 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1511,7 +1511,7 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con
         && configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT)
         throw Exception(
             ErrorCodes::BAD_ARGUMENTS,
-            "Http method can be POST or PUT (current: {}). For insert default is POST, for select GET",
+            "HTTP method can be POST or PUT (current: {}). For insert default is POST, for select GET",
             configuration.http_method);
 
     configuration.format = collection.getOrDefault<String>("format", "auto");
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index f0745ab43f3..6c12970c4bb 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -442,3 +442,9 @@ ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or
 
 # DOS/Windows newlines
 find $ROOT_PATH/{base,src,programs,utils,docs} -name '*.md' -or -name '*.h' -or -name '*.cpp' -or -name '*.js' -or -name '*.py' -or -name '*.html' | xargs grep -l -P '\r$' && echo "^ Files contain DOS/Windows newlines (\r\n instead of \n)."
+
+# Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong.
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
+    echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."

From 9686bb51bcbd0e1272ae8f8ad0b5265aff71a6c2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 02:15:14 +0100
Subject: [PATCH 768/884] Style check for abbreviations

---
 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
index e55d07defd0..9b0c6327e01 100644
--- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
+++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h
@@ -125,7 +125,7 @@ protected:
     bool convertImpl(String & out, IParser::Pos & pos) override;
 };
 
-class ParseJJSON : public IParserKQLFunction
+class ParseJSON : public IParserKQLFunction
 {
 protected:
     const char * getName() const override { return "parse_json()"; }

From d10b18afeb121b47b7ccef384550e9eb765391e2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 02:21:55 +0100
Subject: [PATCH 769/884] I found TODO in the code, that can be implemented

---
 src/Client/QueryFuzzer.cpp | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 24be7491ec7..0597a7c1eed 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -1168,23 +1168,13 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
 
         fuzz(select->children);
     }
-    /*
-     * The time to fuzz the settings has not yet come.
-     * Apparently we don't have any infrastructure to validate the values of
-     * the settings, and the first query with max_block_size = -1 breaks
-     * because of overflows here and there.
-     *//*
-     * else if (auto * set = typeid_cast<ASTSetQuery *>(ast.get()))
-     * {
-     *      for (auto & c : set->changes)
-     *      {
-     *          if (fuzz_rand() % 50 == 0)
-     *          {
-     *              c.value = fuzzField(c.value);
-     *          }
-     *      }
-     * }
-     */
+    else if (auto * set = typeid_cast<ASTSetQuery *>(ast.get()))
+    {
+        /// Fuzz settings
+        for (auto & c : set->changes)
+            if (fuzz_rand() % 50 == 0)
+                c.value = fuzzField(c.value);
+    }
     else if (auto * literal = typeid_cast<ASTLiteral *>(ast.get()))
     {
         // There is a caveat with fuzzing the children: many ASTs also keep the

From 3128cf1f1b2ea4448822f2a3f5c7825d49258641 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 02:24:21 +0100
Subject: [PATCH 770/884] Remove the check for formatting consistency from the
 Fuzzer

---
 programs/client/Client.cpp | 78 +-------------------------------------
 1 file changed, 1 insertion(+), 77 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index fdd262f185d..7a77b7dd0ec 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -845,83 +845,7 @@ bool Client::processWithFuzzing(const String & full_query)
             have_error = true;
         }
 
-        // Check that after the query is formatted, we can parse it back,
-        // format again and get the same result. Unfortunately, we can't
-        // compare the ASTs, which would be more sensitive to errors. This
-        // double formatting check doesn't catch all errors, e.g. we can
-        // format query incorrectly, but to a valid SQL that we can then
-        // parse and format into the same SQL.
-        // There are some complicated cases where we can generate the SQL
-        // which we can't parse:
-        // * first argument of lambda() replaced by fuzzer with
-        //   something else, leading to constructs such as
-        //   arrayMap((min(x) + 3) -> x + 1, ....)
-        // * internals of Enum replaced, leading to:
-        //   Enum(equals(someFunction(y), 3)).
-        // And there are even the cases when we can parse the query, but
-        // it's logically incorrect and its formatting is a mess, such as
-        // when `lambda()` function gets substituted into a wrong place.
-        // To avoid dealing with these cases, run the check only for the
-        // queries we were able to successfully execute.
-        // Another caveat is that sometimes WITH queries are not executed,
-        // if they are not referenced by the main SELECT, so they can still
-        // have the aforementioned problems. Disable this check for such
-        // queries, for lack of a better solution.
-        // There is also a problem that fuzzer substitutes positive Int64
-        // literals or Decimal literals, which are then parsed back as
-        // UInt64, and suddenly duplicate alias substitution starts or stops
-        // working (ASTWithAlias::formatImpl) or something like that.
-        // So we compare not even the first and second formatting of the
-        // query, but second and third.
-        // If you have to add any more workarounds to this check, just remove
-        // it altogether, it's not so useful.
-        if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process))
-        {
-            ASTPtr ast_2;
-            try
-            {
-                const auto * tmp_pos = query_to_execute.c_str();
-                ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */);
-            }
-            catch (Exception & e)
-            {
-                if (e.code() != ErrorCodes::SYNTAX_ERROR &&
-                    e.code() != ErrorCodes::TOO_DEEP_RECURSION)
-                    throw;
-            }
-
-            if (ast_2)
-            {
-                const auto text_2 = ast_2->formatForErrorMessage();
-                const auto * tmp_pos = text_2.c_str();
-                const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(),
-                    false /* allow_multi_statements */);
-                const auto text_3 = ast_3 ? ast_3->formatForErrorMessage() : "";
-
-                if (text_3 != text_2)
-                {
-                    fmt::print(stderr, "Found error: The query formatting is broken.\n");
-
-                    printChangedSettings();
-
-                    fmt::print(stderr,
-                        "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
-                        text_3, text_2);
-                    fmt::print(stderr, "In more detail:\n");
-                    fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree());
-                    fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute);
-                    fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree());
-                    fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2);
-                    fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3 ? ast_3->dumpTree() : "");
-                    fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3);
-                    fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n");
-
-                    _exit(1);
-                }
-            }
-        }
-
-        // The server is still alive so we're going to continue fuzzing.
+        // The server is still alive, so we're going to continue fuzzing.
         // Determine what we're going to use as the starting AST.
         if (have_error)
         {

From 52bcecf7b02cfff242164d2158ef10af342685f9 Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Fri, 16 Feb 2024 11:38:28 -0800
Subject: [PATCH 771/884] Bump curl to version 4.6.0

---
 contrib/curl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/curl b/contrib/curl
index 7161cb17c01..5ce164e0e92 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd
+Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008

From 20b9e519ffd30108e66a4457240a59696bca0181 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 04:59:32 +0100
Subject: [PATCH 772/884] Add a test

---
 .../1_stateful/00098_primary_key_memory_allocated.reference      | 1 +
 tests/queries/1_stateful/00098_primary_key_memory_allocated.sql  | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 tests/queries/1_stateful/00098_primary_key_memory_allocated.reference
 create mode 100644 tests/queries/1_stateful/00098_primary_key_memory_allocated.sql

diff --git a/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference b/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference
new file mode 100644
index 00000000000..72749c905a3
--- /dev/null
+++ b/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference
@@ -0,0 +1 @@
+1	1	1
diff --git a/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql b/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql
new file mode 100644
index 00000000000..7371678a0f6
--- /dev/null
+++ b/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql
@@ -0,0 +1 @@
+SELECT primary_key_bytes_in_memory < 16000, primary_key_bytes_in_memory_allocated < 16000, primary_key_bytes_in_memory_allocated / primary_key_bytes_in_memory < 1.1 FROM system.parts WHERE database = 'test' AND table = 'hits';

From b5efbe62e7dc25f6049571348ccf18d34ba5a191 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 05:41:52 +0100
Subject: [PATCH 773/884] Avoid overflow in settings

---
 src/Core/SettingsFields.cpp                                | 7 ++++---
 src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 --
 .../queries/0_stateless/02992_settings_overflow.reference  | 0
 tests/queries/0_stateless/02992_settings_overflow.sql      | 1 +
 4 files changed, 5 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02992_settings_overflow.reference
 create mode 100644 tests/queries/0_stateless/02992_settings_overflow.sql

diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp
index 80197cfbe22..a5258b3d0b3 100644
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@@ -2,10 +2,11 @@
 
 #include <Core/Field.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
-#include <Common/FieldVisitorConvertToNumber.h>
+#include <Interpreters/convertFieldToType.h>
 #include <Common/logger_useful.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteHelpers.h>
@@ -50,7 +51,7 @@ namespace
         if (f.getType() == Field::Types::String)
             return stringToNumber<T>(f.get<const String &>());
         else
-            return applyVisitor(FieldVisitorConvertToNumber<T>(), f);
+            return static_cast<T>(convertFieldToTypeOrThrow(f, DataTypeNumber<NearestFieldType<T>>()).template get<T>());
     }
 
     Map stringToMap(const String & str)
@@ -174,7 +175,7 @@ namespace
         if (f.getType() == Field::Types::String)
             return stringToMaxThreads(f.get<const String &>());
         else
-            return applyVisitor(FieldVisitorConvertToNumber<UInt64>(), f);
+            return convertFieldToTypeOrThrow(f, DataTypeUInt64()).template get<UInt64>();
     }
 }
 
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index bf584b759f8..f91f7cf536b 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -16,7 +16,6 @@
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/convertFieldToType.h>
-#include <Interpreters/ExpressionActions.h>
 #include <Interpreters/castColumn.h>
 #include <IO/ReadHelpers.h>
 #include <Parsers/ASTExpressionList.h>
@@ -28,7 +27,6 @@
 #include <Processors/Formats/Impl/ConstantExpressionTemplate.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <boost/functional/hash.hpp>
-#include <base/sort.h>
 
 
 namespace DB
diff --git a/tests/queries/0_stateless/02992_settings_overflow.reference b/tests/queries/0_stateless/02992_settings_overflow.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02992_settings_overflow.sql b/tests/queries/0_stateless/02992_settings_overflow.sql
new file mode 100644
index 00000000000..cbe14788709
--- /dev/null
+++ b/tests/queries/0_stateless/02992_settings_overflow.sql
@@ -0,0 +1 @@
+SET max_threads = -1; -- { serverError ARGUMENT_OUT_OF_BOUND }

From 96113a46c5f0f6d5488917e6657ce89330d2ee7d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 05:52:24 +0100
Subject: [PATCH 774/884] Update LZ4 to the latest dev

---
 contrib/lz4 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/lz4 b/contrib/lz4
index 92ebf1870b9..ce45a9dbdb0 160000
--- a/contrib/lz4
+++ b/contrib/lz4
@@ -1 +1 @@
-Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40
+Subproject commit ce45a9dbdb059511a3e9576b19db3e7f1a4f172e

From ac5bef7c74f98fdc8423d8ef5c9d78f1351d79cb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:16:01 +0100
Subject: [PATCH 775/884] A small preparation for better handling of primary
 key in memory

---
 src/Processors/QueryPlan/PartsSplitter.cpp    |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 21 ++++++++++---------
 src/Storages/MergeTree/MergeTreeData.cpp      |  4 ++--
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  2 +-
 .../MergeTree/MergedBlockOutputStream.cpp     |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         |  2 +-
 7 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 363fdca22c5..0fc6ddd6408 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -54,7 +54,7 @@ public:
 
     Values getValue(size_t part_idx, size_t mark) const
     {
-        const auto & index = parts[part_idx].data_part->index;
+        const auto & index = parts[part_idx].data_part->getIndex();
         Values values(index.size());
         for (size_t i = 0; i < values.size(); ++i)
         {
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 0f82e00edff..bc64632356f 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -313,13 +313,13 @@ IMergeTreeDataPart::IMergeTreeDataPart(
     const IMergeTreeDataPart * parent_part_)
     : DataPartStorageHolder(data_part_storage_)
     , storage(storage_)
-    , mutable_name(name_)
     , name(mutable_name)
     , info(info_)
     , index_granularity_info(storage_, part_type_)
     , part_type(part_type_)
     , parent_part(parent_part_)
     , parent_part_name(parent_part ? parent_part->name : "")
+    , mutable_name(name_)
 {
     if (parent_part)
     {
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index fcf9d5bd17d..878258bddf0 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -75,6 +75,7 @@ public:
     using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
     using NameToNumber = std::unordered_map<std::string, size_t>;
 
+    using Index = Columns;
     using IndexSizeByName = std::unordered_map<std::string, ColumnSize>;
 
     using Type = MergeTreeDataPartType;
@@ -212,10 +213,6 @@ public:
 
     const MergeTreeData & storage;
 
-private:
-    String mutable_name;
-    mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};
-
 public:
     const String & name;    // const ref to private mutable_name
     MergeTreePartInfo info;
@@ -309,12 +306,6 @@ public:
     /// Throws an exception if state of the part is not in affordable_states
     void assertState(const std::initializer_list<MergeTreeDataPartState> & affordable_states) const;
 
-    /// Primary key (correspond to primary.idx file).
-    /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
-    /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
-    using Index = Columns;
-    Index index;
-
     MergeTreePartition partition;
 
     /// Amount of rows between marks
@@ -369,6 +360,9 @@ public:
     /// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree.
     int32_t metadata_version;
 
+    const Index & getIndex() const { return index; }
+    void setIndex(Columns index_) { index = std::move(index_); }
+
     /// For data in RAM ('index')
     UInt64 getIndexSizeInBytes() const;
     UInt64 getIndexSizeInAllocatedBytes() const;
@@ -567,6 +561,10 @@ public:
     mutable std::atomic<time_t> last_removal_attempt_time = 0;
 
 protected:
+    /// Primary key (correspond to primary.idx file).
+    /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
+    /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
+    Index index;
 
     /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
     ColumnSize total_columns_size;
@@ -623,6 +621,9 @@ protected:
     void initializeIndexGranularityInfo();
 
 private:
+    String mutable_name;
+    mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};
+
     /// In compact parts order of columns is necessary
     NameToNumber column_name_to_position;
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index c8262914702..e14a358745e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6824,7 +6824,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
         {
             for (const auto & part : real_parts)
             {
-                const auto & primary_key_column = *part->index[0];
+                const auto & primary_key_column = *part->getIndex()[0];
                 auto & min_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
                 insert(min_column, primary_key_column[0]);
             }
@@ -6835,7 +6835,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
         {
             for (const auto & part : real_parts)
             {
-                const auto & primary_key_column = *part->index[0];
+                const auto & primary_key_column = *part->getIndex()[0];
                 auto & max_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
                 insert(max_column, primary_key_column[primary_key_column.size() - 1]);
             }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index a76d370d057..1ba28713680 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1087,7 +1087,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
     MarkRanges res;
 
     size_t marks_count = part->index_granularity.getMarksCount();
-    const auto & index = part->index;
+    const auto & index = part->getIndex();
     if (marks_count == 0)
         return res;
 
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 1d10a1433ef..f2fe2e0f255 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
 
     new_part->rows_count = rows_count;
     new_part->modification_time = time(nullptr);
-    new_part->index = writer->releaseIndexColumns();
+    new_part->setIndex(writer->releaseIndexColumns());
     new_part->checksums = checksums;
     new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
     new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 9959688d889..6882963fd24 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -899,7 +899,7 @@ void finalizeMutatedPart(
 
     new_data_part->rows_count = source_part->rows_count;
     new_data_part->index_granularity = source_part->index_granularity;
-    new_data_part->index = source_part->index;
+    new_data_part->setIndex(source_part->getIndex());
     new_data_part->minmax_idx = source_part->minmax_idx;
     new_data_part->modification_time = time(nullptr);
 

From fce3a8cafc13b4b454ed53966b9f7fd1d7f845fa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:16:56 +0100
Subject: [PATCH 776/884] A small preparation for better handling of primary
 key in memory

---
 src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 878258bddf0..e82dc8fc2a3 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -563,7 +563,7 @@ public:
 protected:
     /// Primary key (correspond to primary.idx file).
     /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
-    /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
+    /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
     Index index;
 
     /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk

From e9cf92245f126d8b92cd29747f38bf94e0b466c2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:20:44 +0100
Subject: [PATCH 777/884] Move methods to .cpp

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 +++++++++++++
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index bc64632356f..70a8cee6106 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -342,6 +342,19 @@ IMergeTreeDataPart::~IMergeTreeDataPart()
     decrementTypeMetric(part_type);
 }
 
+
+const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
+{
+    return index;
+}
+
+
+void IMergeTreeDataPart::setIndex(Columns index_)
+{
+    index = std::move(index_);
+}
+
+
 void IMergeTreeDataPart::setName(const String & new_name)
 {
     mutable_name = new_name;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index e82dc8fc2a3..f93b1a4c9c1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -360,8 +360,8 @@ public:
     /// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree.
     int32_t metadata_version;
 
-    const Index & getIndex() const { return index; }
-    void setIndex(Columns index_) { index = std::move(index_); }
+    const Index & getIndex() const;
+    void setIndex(Columns index_);
 
     /// For data in RAM ('index')
     UInt64 getIndexSizeInBytes() const;

From dc0b3e8bb5c0b1de5b7e2dd7a4b02e22ee800d35 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:33:13 +0100
Subject: [PATCH 778/884] Update test

---
 .../0_stateless/01557_field_infinite_convert_to_number.sql      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
index edc4d5cbc91..2ceea259de0 100644
--- a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
+++ b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
@@ -1 +1 @@
-SET max_threads = nan; -- { serverError 70 }
+SET max_threads = nan; -- { serverError 69 }

From 423ff759f1ee62bde0000b441751b3fdad24fdc0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:35:03 +0100
Subject: [PATCH 779/884] Lazy loading of primary keys in memory

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 11 +++++++++--
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  8 +++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 70a8cee6106..240010a7550 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -345,13 +345,21 @@ IMergeTreeDataPart::~IMergeTreeDataPart()
 
 const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
 {
+    std::scoped_lock lock(index_mutex);
+    if (!index_loaded)
+        loadIndex(lock);
+    index_loaded = true;
     return index;
 }
 
 
 void IMergeTreeDataPart::setIndex(Columns index_)
 {
+    std::scoped_lock lock(index_mutex);
+    if (!index.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once");
     index = std::move(index_);
+    index_loaded = true;
 }
 
 
@@ -683,7 +691,6 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
         loadChecksums(require_columns_checksums);
         loadIndexGranularity();
         calculateColumnsAndSecondaryIndicesSizesOnDisk();
-        loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
         loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
         loadPartitionAndMinMaxIndex();
         bool has_broken_projections = false;
@@ -817,7 +824,7 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co
 {
 }
 
-void IMergeTreeDataPart::loadIndex()
+void IMergeTreeDataPart::loadIndex(std::scoped_lock<std::mutex> &) const
 {
     /// It can be empty in case of mutations
     if (!index_granularity.isInitialized())
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index f93b1a4c9c1..0fc874b7953 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -562,9 +562,11 @@ public:
 
 protected:
     /// Primary key (correspond to primary.idx file).
-    /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
+    /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple.
     /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
-    Index index;
+    mutable std::mutex index_mutex;
+    mutable Index index;
+    mutable bool index_loaded = false;
 
     /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
     ColumnSize total_columns_size;
@@ -662,7 +664,7 @@ private:
     virtual void appendFilesOfIndexGranularity(Strings & files) const;
 
     /// Loads index file.
-    void loadIndex();
+    void loadIndex(std::scoped_lock<std::mutex> &) const;
 
     void appendFilesOfIndex(Strings & files) const;
 

From 021e11e4fc674fa8eb5ea7fec17563dbfea58a22 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 06:42:53 +0100
Subject: [PATCH 780/884] Add a setting for lazy loading

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 ++++
 src/Storages/MergeTree/MergeTreeSettings.h    | 1 +
 2 files changed, 5 insertions(+)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 240010a7550..fee7286da3b 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -690,6 +690,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
         loadColumns(require_columns_checksums);
         loadChecksums(require_columns_checksums);
         loadIndexGranularity();
+
+        if (!storage.getSettings()->primary_key_lazy_load)
+            getIndex();
+
         calculateColumnsAndSecondaryIndicesSizesOnDisk();
         loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
         loadPartitionAndMinMaxIndex();
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 96cab9c0293..b64632b6139 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -201,6 +201,7 @@ struct Settings;
     M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \
     M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \
     M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \
+    M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \
     \
     /** Projection settings. */ \
     M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \

From 9acd87aa8c847fe00db1be879f4bd83b9d9aea1c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 07:36:47 +0100
Subject: [PATCH 781/884] Add a test

---
 .../02993_lazy_index_loading.reference         |  4 ++++
 .../0_stateless/02993_lazy_index_loading.sql   | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/02993_lazy_index_loading.reference
 create mode 100644 tests/queries/0_stateless/02993_lazy_index_loading.sql

diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference
new file mode 100644
index 00000000000..5313c84136e
--- /dev/null
+++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference
@@ -0,0 +1,4 @@
+100000000	140000000
+0	0
+1
+100000000	140000000
diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.sql b/tests/queries/0_stateless/02993_lazy_index_loading.sql
new file mode 100644
index 00000000000..7de4af9ef0e
--- /dev/null
+++ b/tests/queries/0_stateless/02993_lazy_index_loading.sql
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (s String) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1;
+
+INSERT INTO test SELECT randomString(1000) FROM numbers(100000);
+SELECT round(primary_key_bytes_in_memory, -7), round(primary_key_bytes_in_memory_allocated, -7) FROM system.parts WHERE database = currentDatabase() AND table = 'test';
+
+DETACH TABLE test;
+SET max_memory_usage = '50M';
+ATTACH TABLE test;
+
+SELECT primary_key_bytes_in_memory, primary_key_bytes_in_memory_allocated FROM system.parts WHERE database = currentDatabase() AND table = 'test';
+
+SET max_memory_usage = '200M';
+SELECT s != '' FROM test LIMIT 1;
+
+SELECT round(primary_key_bytes_in_memory, -7), round(primary_key_bytes_in_memory_allocated, -7) FROM system.parts WHERE database = currentDatabase() AND table = 'test';
+
+DROP TABLE test;

From 3d21d546a92a15e77470648a9652f2f62a4d6825 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 09:44:10 +0100
Subject: [PATCH 782/884] Better code

---
 src/Common/FieldVisitorConvertToNumber.h |  2 +-
 src/Core/AccurateComparison.h            |  2 +-
 src/Core/SettingsFields.cpp              | 49 +++++++++++++++++++++---
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h
index bf8c8c8638e..47a1e669969 100644
--- a/src/Common/FieldVisitorConvertToNumber.h
+++ b/src/Common/FieldVisitorConvertToNumber.h
@@ -91,7 +91,7 @@ public:
         if constexpr (std::is_floating_point_v<T>)
             return x.getValue().template convertTo<T>() / x.getScaleMultiplier().template convertTo<T>();
         else
-            return (x.getValue() / x.getScaleMultiplier()). template convertTo<T>();
+            return (x.getValue() / x.getScaleMultiplier()).template convertTo<T>();
     }
 
     T operator() (const AggregateFunctionStateData &) const
diff --git a/src/Core/AccurateComparison.h b/src/Core/AccurateComparison.h
index a201c136e3a..139ee4d88dc 100644
--- a/src/Core/AccurateComparison.h
+++ b/src/Core/AccurateComparison.h
@@ -152,7 +152,7 @@ bool notEqualsOp(A a, B b)
 }
 
 /// Converts numeric to an equal numeric of other type.
-/// When `strict` is `true` check that result exactly same as input, otherwise just check overflow
+/// When `strict` is `true` check that result exactly the same as input, otherwise just check overflow
 template <typename From, typename To, bool strict = true>
 inline bool NO_SANITIZE_UNDEFINED convertNumeric(From value, To & result)
 {
diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp
index a5258b3d0b3..223bdc1aeb7 100644
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@@ -1,12 +1,11 @@
 #include <Core/SettingsFields.h>
-
 #include <Core/Field.h>
+#include <Core/AccurateComparison.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
-#include <Interpreters/convertFieldToType.h>
+#include <Common/FieldVisitors.h>
 #include <Common/logger_useful.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteHelpers.h>
@@ -14,6 +13,7 @@
 
 #include <cmath>
 
+
 namespace DB
 {
 namespace ErrorCodes
@@ -21,6 +21,7 @@ namespace ErrorCodes
     extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH;
     extern const int CANNOT_PARSE_BOOL;
     extern const int CANNOT_PARSE_NUMBER;
+    extern const int CANNOT_CONVERT_TYPE;
 }
 
 
@@ -49,9 +50,47 @@ namespace
     T fieldToNumber(const Field & f)
     {
         if (f.getType() == Field::Types::String)
+        {
             return stringToNumber<T>(f.get<const String &>());
+        }
+        else if (f.getType() == Field::Types::UInt64)
+        {
+            T result;
+            if (!accurate::convertNumeric(f.get<UInt64>(), result))
+                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name()));
+            return result;
+        }
+        else if (f.getType() == Field::Types::Int64)
+        {
+            T result;
+            if (!accurate::convertNumeric(f.get<Int64>(), result))
+                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name()));
+            return result;
+        }
+        else if (f.getType() == Field::Types::Float64)
+        {
+            Float64 x = f.get<Float64>();
+            if constexpr (std::is_floating_point_v<T>)
+            {
+                return T(x);
+            }
+            else
+            {
+                if (!isFinite(x))
+                {
+                    /// Conversion of infinite values to integer is undefined.
+                    throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert infinite value to integer type");
+                }
+                else if (x > Float64(std::numeric_limits<T>::max()) || x < Float64(std::numeric_limits<T>::lowest()))
+                {
+                    throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert out of range floating point value to integer type");
+                }
+                else
+                    return T(x);
+            }
+        }
         else
-            return static_cast<T>(convertFieldToTypeOrThrow(f, DataTypeNumber<NearestFieldType<T>>()).template get<T>());
+            throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Invalid value {} of the setting, which needs {}", f, demangle(typeid(T).name()));
     }
 
     Map stringToMap(const String & str)
@@ -175,7 +214,7 @@ namespace
         if (f.getType() == Field::Types::String)
             return stringToMaxThreads(f.get<const String &>());
         else
-            return convertFieldToTypeOrThrow(f, DataTypeUInt64()).template get<UInt64>();
+            return fieldToNumber<UInt64>(f);
     }
 }
 

From dbb0ed8157461eeafba98cc14e868c4536052687 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 10:08:36 +0100
Subject: [PATCH 783/884] Fix error

---
 src/Core/SettingsFields.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp
index 223bdc1aeb7..9562f45a35d 100644
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@@ -67,6 +67,13 @@ namespace
                 throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name()));
             return result;
         }
+        else if (f.getType() == Field::Types::Bool)
+        {
+            if constexpr (std::is_same_v<T, bool>)
+                return f.get<bool>();
+            else
+                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion of a bool value {} to {} looks suspicious", f, demangle(typeid(T).name()));
+        }
         else if (f.getType() == Field::Types::Float64)
         {
             Float64 x = f.get<Float64>();

From b7e563c0a699c416f8d38ac590c5a631aa2ea4a4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 11:13:54 +0100
Subject: [PATCH 784/884] Update test

---
 tests/queries/0_stateless/02993_lazy_index_loading.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference
index 5313c84136e..5bc329ae4eb 100644
--- a/tests/queries/0_stateless/02993_lazy_index_loading.reference
+++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference
@@ -1,4 +1,4 @@
 100000000	140000000
 0	0
 1
-100000000	140000000
+100000000	100000000

From f075961728c69ace187aec038e2a21ee38ce6751 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Feb 2024 11:37:31 +0100
Subject: [PATCH 785/884] Update

---
 src/Core/SettingsFields.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp
index 9562f45a35d..d1a1439fb15 100644
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@@ -69,10 +69,7 @@ namespace
         }
         else if (f.getType() == Field::Types::Bool)
         {
-            if constexpr (std::is_same_v<T, bool>)
-                return f.get<bool>();
-            else
-                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion of a bool value {} to {} looks suspicious", f, demangle(typeid(T).name()));
+            return T(f.get<bool>());
         }
         else if (f.getType() == Field::Types::Float64)
         {

From 3323d5ce81914d93026f2cf68b68170cf8c4053b Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sat, 17 Feb 2024 15:02:26 +0100
Subject: [PATCH 786/884] Move threadPoolCallbackRunner to the "Common" folder.

---
 src/Backups/BackupIO_AzureBlobStorage.cpp               | 2 +-
 src/Backups/BackupIO_S3.cpp                             | 2 +-
 src/{Interpreters => Common}/threadPoolCallbackRunner.h | 0
 src/Disks/IO/ThreadPoolReader.h                         | 2 +-
 src/Disks/IO/ThreadPoolRemoteFSReader.h                 | 2 +-
 src/Disks/ObjectStorages/IObjectStorage.h               | 2 +-
 src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h   | 2 +-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp         | 2 +-
 src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h      | 2 +-
 src/IO/ParallelReadBuffer.h                             | 2 +-
 src/IO/S3/copyS3File.h                                  | 2 +-
 src/IO/WriteBufferFromS3.h                              | 2 +-
 src/Storages/MergeTree/MergeTreeData.h                  | 2 +-
 src/Storages/MergeTree/MergeTreeMarksLoader.cpp         | 2 +-
 src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp  | 2 +-
 src/Storages/MergeTree/MergeTreeSource.cpp              | 2 +-
 src/Storages/StorageS3.h                                | 2 +-
 src/Storages/StorageURL.cpp                             | 2 +-
 src/Storages/System/StorageSystemDetachedParts.cpp      | 2 +-
 19 files changed, 18 insertions(+), 18 deletions(-)
 rename src/{Interpreters => Common}/threadPoolCallbackRunner.h (100%)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 52ce20d5108..b3b92323109 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -2,7 +2,7 @@
 
 #if USE_AZURE_BLOB_STORAGE
 #include <Common/quoteString.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
 #include <IO/SharedThreadPools.h>
 #include <IO/HTTPHeaderEntries.h>
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index fa4c1af3698..f8bbf5b1f79 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -2,7 +2,7 @@
 
 #if USE_AWS_S3
 #include <Common/quoteString.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
 #include <IO/SharedThreadPools.h>
 #include <IO/ReadBufferFromS3.h>
diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h
similarity index 100%
rename from src/Interpreters/threadPoolCallbackRunner.h
rename to src/Common/threadPoolCallbackRunner.h
diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h
index 42bc9bf8bb4..b8aff9f22a2 100644
--- a/src/Disks/IO/ThreadPoolReader.h
+++ b/src/Disks/IO/ThreadPoolReader.h
@@ -3,7 +3,7 @@
 #include <memory>
 #include <IO/AsynchronousReader.h>
 #include <Common/ThreadPool_fwd.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 
 
 namespace DB
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h
index cd2bf223f33..abc251b2b10 100644
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.h
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h
@@ -3,7 +3,7 @@
 #include <IO/AsynchronousReader.h>
 #include <IO/SeekableReadBuffer.h>
 #include <Common/ThreadPool_fwd.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 
 namespace DB
 {
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 049935ad60c..56c269a3fc5 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -22,7 +22,7 @@
 #include <Core/Types.h>
 #include <Disks/DirectoryIterator.h>
 #include <Common/ThreadPool.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Common/Exception.h>
 #include "config.h"
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index a6abe03bac9..5f63e5f6e8a 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -2,7 +2,7 @@
 
 #include <Disks/ObjectStorages/ObjectStorageIterator.h>
 #include <Common/ThreadPool.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <mutex>
 #include <Common/CurrentMetrics.h>
 
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 4cc49288af6..5771eb1ebe0 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -13,7 +13,7 @@
 #include <IO/S3/getObjectInfo.h>
 #include <IO/S3/copyS3File.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <IO/S3/BlobStorageLogWriter.h>
 
 #include <Disks/ObjectStorages/S3/diskSettings.h>
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 83814f42693..1433f8d18ba 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -6,7 +6,7 @@
 
 #include <Storages/StorageAzureBlobCluster.h>
 #include <Storages/StorageAzureBlob.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <base/types.h>
 #include <functional>
 #include <memory>
diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h
index e76b40f77b7..daac1190399 100644
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@@ -3,7 +3,7 @@
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/ReadBuffer.h>
 #include <IO/SeekableReadBuffer.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Common/ArenaWithFreeLists.h>
 
 namespace DB
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 607be51ed25..093d26ba7bb 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -5,7 +5,7 @@
 #if USE_AWS_S3
 
 #include <Storages/StorageS3Settings.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <IO/S3/BlobStorageLogWriter.h>
 #include <base/types.h>
 #include <functional>
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 230f39b074e..5dc269990a1 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -10,7 +10,7 @@
 #include <IO/WriteBuffer.h>
 #include <IO/WriteSettings.h>
 #include <Storages/StorageS3Settings.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <IO/S3/BlobStorageLogWriter.h>
 
 #include <memory>
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 4ad440dae00..1de79ed17ca 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -35,7 +35,7 @@
 #include <Storages/extractKeyExpressionList.h>
 #include <Storages/PartitionCommands.h>
 #include <Interpreters/PartLog.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 
 
 #include <boost/multi_index_container.hpp>
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index 7531c03a011..aeb6afeff11 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -1,6 +1,6 @@
 #include <Compression/CompressedReadBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeMarksLoader.h>
 #include <Common/CurrentMetrics.h>
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index 47c2fe07bb4..8d8b0f1cc79 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -1,6 +1,6 @@
 #include <IO/Operators.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Storages/MergeTree/AlterConversions.h>
 #include <Storages/MergeTree/IMergeTreeReader.h>
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp
index a450505f7a8..e1d1d0951e4 100644
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@@ -1,6 +1,6 @@
 #include <Storages/MergeTree/MergeTreeSource.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <IO/SharedThreadPools.h>
 #include <Common/EventFD.h>
 
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 73559ef8571..587145cd1a7 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -19,7 +19,7 @@
 #include <IO/CompressionMethod.h>
 #include <IO/SeekableReadBuffer.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageConfiguration.h>
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6f3599630d3..608e44c3cd0 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -5,7 +5,7 @@
 #include <Storages/VirtualColumnUtils.h>
 
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTLiteral.h>
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index a9cd5f2610a..3dae43976f7 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -11,7 +11,7 @@
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <QueryPipeline/Pipe.h>
 #include <IO/SharedThreadPools.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Common/threadPoolCallbackRunner.h>
 
 #include <mutex>
 

From dfc4b72ab2d596405cef922eee1323edaea2e262 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 02:10:05 +0100
Subject: [PATCH 787/884] Update
 tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql

Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
---
 .../0_stateless/01557_field_infinite_convert_to_number.sql      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
index 2ceea259de0..cc71c8e6f6c 100644
--- a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
+++ b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql
@@ -1 +1 @@
-SET max_threads = nan; -- { serverError 69 }
+SET max_threads = nan; -- { serverError CANNOT_CONVERT_TYPE }

From 2e95c689eb72f81a3cd078870c3dbda4ac038d3b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 02:10:24 +0100
Subject: [PATCH 788/884] Update 02992_settings_overflow.sql

---
 tests/queries/0_stateless/02992_settings_overflow.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02992_settings_overflow.sql b/tests/queries/0_stateless/02992_settings_overflow.sql
index cbe14788709..d120c3400e5 100644
--- a/tests/queries/0_stateless/02992_settings_overflow.sql
+++ b/tests/queries/0_stateless/02992_settings_overflow.sql
@@ -1 +1 @@
-SET max_threads = -1; -- { serverError ARGUMENT_OUT_OF_BOUND }
+SET max_threads = -1; -- { serverError CANNOT_CONVERT_TYPE }

From c50f8e79542eae88709cbe241bfb94710ac96af8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 02:12:31 +0100
Subject: [PATCH 789/884] Remove a header

---
 src/Core/SettingsFields.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp
index d1a1439fb15..f72b64fd56d 100644
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@@ -2,7 +2,6 @@
 #include <Core/Field.h>
 #include <Core/AccurateComparison.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
-#include <Common/FieldVisitors.h>
 #include <Common/logger_useful.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>

From c083498cf87b4181614228f2e796369bafb09bfe Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 05:13:17 +0100
Subject: [PATCH 790/884] Apply review comments

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp   | 3 +++
 src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index b0eff01a4e7..2826c3e23f1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -830,6 +830,9 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co
 
 void IMergeTreeDataPart::loadIndex(std::scoped_lock<std::mutex> &) const
 {
+    /// Memory for index must not be accounted as memory usage for query, because it belongs to a table.
+    MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
+
     /// It can be empty in case of mutations
     if (!index_granularity.isInitialized())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Index granularity is not loaded before index loading");
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index 7531c03a011..eb7b60b0727 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -6,7 +6,6 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
 #include <Common/ThreadPool.h>
-#include <Common/scope_guard_safe.h>
 #include <Common/setThreadName.h>
 
 #include <utility>

From 232fe0dd1b62974e6bcef053b8a13cdc4694fd07 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 05:38:34 +0100
Subject: [PATCH 791/884] Fix inconsistent formatting of SELECT ... FROM
 (EXPLAIN ...)

---
 src/Analyzer/QueryNode.cpp                  |  5 +----
 src/Analyzer/UnionNode.cpp                  |  5 +----
 src/Interpreters/TreeRewriter.cpp           |  3 +--
 src/Parsers/ASTSelectWithUnionQuery.cpp     |  3 +--
 src/Parsers/ASTSubquery.h                   |  7 +++++++
 src/Parsers/ExpressionElementParsers.cpp    |  7 +++----
 src/Parsers/ExpressionListParsers.cpp       |  6 ++----
 src/Parsers/Kusto/ParserKQLQuery.cpp        | 18 ++++++++----------
 src/Storages/StorageView.cpp                |  3 +--
 src/TableFunctions/TableFunctionExplain.cpp | 20 ++++++++++++++++++--
 10 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp
index d0bff759dea..bc7a29247e4 100644
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@@ -421,11 +421,8 @@ ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
 
     if (is_subquery)
     {
-        auto subquery = std::make_shared<ASTSubquery>();
-
+        auto subquery = std::make_shared<ASTSubquery>(std::move(result_select_query));
         subquery->cte_name = cte_name;
-        subquery->children.push_back(std::move(result_select_query));
-
         return subquery;
     }
 
diff --git a/src/Analyzer/UnionNode.cpp b/src/Analyzer/UnionNode.cpp
index 5d2ac128abe..c6003126554 100644
--- a/src/Analyzer/UnionNode.cpp
+++ b/src/Analyzer/UnionNode.cpp
@@ -185,11 +185,8 @@ ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
 
     if (is_subquery)
     {
-        auto subquery = std::make_shared<ASTSubquery>();
-
+        auto subquery = std::make_shared<ASTSubquery>(std::move(select_with_union_query));
         subquery->cte_name = cte_name;
-        subquery->children.push_back(std::move(select_with_union_query));
-
         return subquery;
     }
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index ecd021328e7..bb6df2da8d9 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -262,8 +262,7 @@ struct ExistsExpressionData
         select_with_union_query->list_of_selects->children.push_back(std::move(select_query));
         select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
 
-        auto new_subquery = std::make_shared<ASTSubquery>();
-        new_subquery->children.push_back(select_with_union_query);
+        auto new_subquery = std::make_shared<ASTSubquery>(std::move(select_with_union_query));
 
         auto function = makeASTFunction("in", std::make_shared<ASTLiteral>(1u), new_subquery);
         func = *function;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 48b4ae3c38d..c377e4bd66b 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -71,8 +71,7 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F
             }
             else
             {
-                auto sub_query = std::make_shared<ASTSubquery>();
-                sub_query->children.push_back(*it);
+                auto sub_query = std::make_shared<ASTSubquery>(*it);
                 sub_query->formatImpl(settings, state, frame);
             }
         }
diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h
index ef277a63126..e92a88b04dd 100644
--- a/src/Parsers/ASTSubquery.h
+++ b/src/Parsers/ASTSubquery.h
@@ -26,6 +26,13 @@ public:
         return clone;
     }
 
+    ASTSubquery() = default;
+
+    ASTSubquery(ASTPtr child)
+    {
+        children.emplace_back(std::move(child));
+    }
+
     void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
     String getAliasOrColumnName() const override;
     String tryGetAlias() const override;
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 42400a0f13b..62c480e0f6b 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -123,7 +123,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override");
 
         /// Replace subquery `(EXPLAIN <kind> <explain_settings> SELECT ...)`
-        /// with `(SELECT * FROM viewExplain("<kind>", "<explain_settings>", SELECT ...))`
+        /// with `(SELECT * FROM viewExplain('<kind>', '<explain_settings>', (SELECT ...)))`
 
         String kind_str = ASTExplainQuery::toString(explain_query.getKind());
 
@@ -141,7 +141,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             auto view_explain = makeASTFunction("viewExplain",
                 std::make_shared<ASTLiteral>(kind_str),
                 std::make_shared<ASTLiteral>(settings_str),
-                explained_ast);
+                std::make_shared<ASTSubquery>(explained_ast));
             result_node = buildSelectFromTableFunction(view_explain);
         }
         else
@@ -161,8 +161,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         return false;
     ++pos;
 
-    node = std::make_shared<ASTSubquery>();
-    node->children.push_back(result_node);
+    node = std::make_shared<ASTSubquery>(std::move(result_node));
     return true;
 }
 
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 1e9383f96ae..6d267a7d215 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -225,8 +225,7 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type)
     select_with_union_query->list_of_selects->children.push_back(std::move(select_query));
     select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
 
-    auto new_subquery = std::make_shared<ASTSubquery>();
-    new_subquery->children.push_back(select_with_union_query);
+    auto new_subquery = std::make_shared<ASTSubquery>(std::move(select_with_union_query));
     ast->children[0]->children.back() = std::move(new_subquery);
 
     return true;
@@ -1582,8 +1581,7 @@ public:
         if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
             return false;
 
-        auto subquery = std::make_shared<ASTSubquery>();
-        subquery->children.push_back(std::move(node));
+        auto subquery = std::make_shared<ASTSubquery>(std::move(node));
         elements = {makeASTFunction("exists", subquery)};
 
         finished = true;
diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp
index 47986943662..30e9921e744 100644
--- a/src/Parsers/Kusto/ParserKQLQuery.cpp
+++ b/src/Parsers/Kusto/ParserKQLQuery.cpp
@@ -576,20 +576,19 @@ bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!ParserKQLTableFunction().parse(pos, select_node, expected))
         return false;
 
-    ASTPtr node_subquery = std::make_shared<ASTSubquery>();
-    node_subquery->children.push_back(select_node);
+    ASTPtr node_subquery = std::make_shared<ASTSubquery>(std::move(select_node));
 
     ASTPtr node_table_expr = std::make_shared<ASTTableExpression>();
     node_table_expr->as<ASTTableExpression>()->subquery = node_subquery;
 
     node_table_expr->children.emplace_back(node_subquery);
 
-    ASTPtr node_table_in_select_query_emlement = std::make_shared<ASTTablesInSelectQueryElement>();
-    node_table_in_select_query_emlement->as<ASTTablesInSelectQueryElement>()->table_expression = node_table_expr;
+    ASTPtr node_table_in_select_query_element = std::make_shared<ASTTablesInSelectQueryElement>();
+    node_table_in_select_query_element->as<ASTTablesInSelectQueryElement>()->table_expression = node_table_expr;
 
     ASTPtr res = std::make_shared<ASTTablesInSelectQuery>();
 
-    res->children.emplace_back(node_table_in_select_query_emlement);
+    res->children.emplace_back(node_table_in_select_query_element);
 
     node = res;
     return true;
@@ -618,20 +617,19 @@ bool ParserSimpleCHSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
             ASTSelectQuery::Expression::TABLES, parent_select_node->as<ASTSelectQuery>()->tables());
     }
 
-    ASTPtr node_subquery = std::make_shared<ASTSubquery>();
-    node_subquery->children.push_back(sub_select_node);
+    ASTPtr node_subquery = std::make_shared<ASTSubquery>(std::move(sub_select_node));
 
     ASTPtr node_table_expr = std::make_shared<ASTTableExpression>();
     node_table_expr->as<ASTTableExpression>()->subquery = node_subquery;
 
     node_table_expr->children.emplace_back(node_subquery);
 
-    ASTPtr node_table_in_select_query_emlement = std::make_shared<ASTTablesInSelectQueryElement>();
-    node_table_in_select_query_emlement->as<ASTTablesInSelectQueryElement>()->table_expression = node_table_expr;
+    ASTPtr node_table_in_select_query_element = std::make_shared<ASTTablesInSelectQueryElement>();
+    node_table_in_select_query_element->as<ASTTablesInSelectQueryElement>()->table_expression = node_table_expr;
 
     ASTPtr res = std::make_shared<ASTTablesInSelectQuery>();
 
-    res->children.emplace_back(node_table_in_select_query_emlement);
+    res->children.emplace_back(node_table_in_select_query_element);
 
     node = res;
     return true;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 588e5303117..181fd0ac61c 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -251,8 +251,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
 
     view_name = table_expression->database_and_table_name;
     table_expression->database_and_table_name = {};
-    table_expression->subquery = std::make_shared<ASTSubquery>();
-    table_expression->subquery->children.push_back(view_query);
+    table_expression->subquery = std::make_shared<ASTSubquery>(view_query);
     table_expression->subquery->setAlias(alias);
 
     for (auto & child : table_expression->children)
diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp
index f993a9820cb..400fc81e6d4 100644
--- a/src/TableFunctions/TableFunctionExplain.cpp
+++ b/src/TableFunctions/TableFunctionExplain.cpp
@@ -1,4 +1,5 @@
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ParserSetQuery.h>
 #include <Parsers/parseQuery.h>
@@ -21,6 +22,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
+    extern const int UNEXPECTED_AST_STRUCTURE;
 }
 
 namespace
@@ -103,11 +105,25 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt
 
     if (function->arguments->children.size() > 2)
     {
-        const auto & query_arg = function->arguments->children[2];
+        const auto & subquery_arg = function->arguments->children[2];
+        const auto * subquery = subquery_arg->as<ASTSubquery>();
+
+        if (!subquery)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Table function '{}' requires a subquery argument, got '{}'",
+                getName(), queryToString(subquery_arg));
+
+        if (subquery->children.empty())
+            throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+                "A subquery AST element must have a child");
+
+        const auto & query_arg = subquery->children[0];
+
         if (!query_arg->as<ASTSelectWithUnionQuery>())
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "Table function '{}' requires a EXPLAIN SELECT query argument, got EXPLAIN '{}'",
+                "Table function '{}' requires a EXPLAIN's SELECT query argument, got '{}'",
                 getName(), queryToString(query_arg));
+
         explain_query->setExplainedQuery(query_arg);
     }
     else if (kind != ASTExplainQuery::ExplainKind::CurrentTransaction)

From 543b07639c97b08b291d76807452c17009cbde6a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 05:39:55 +0100
Subject: [PATCH 792/884] Add a test

---
 .../02990_format_select_from_explain.reference           | 9 +++++++++
 .../0_stateless/02990_format_select_from_explain.sh      | 7 +++++++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/queries/0_stateless/02990_format_select_from_explain.reference
 create mode 100755 tests/queries/0_stateless/02990_format_select_from_explain.sh

diff --git a/tests/queries/0_stateless/02990_format_select_from_explain.reference b/tests/queries/0_stateless/02990_format_select_from_explain.reference
new file mode 100644
index 00000000000..7c8dcef3824
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_select_from_explain.reference
@@ -0,0 +1,9 @@
+SELECT explain
+FROM
+(
+    SELECT *
+    FROM viewExplain('EXPLAIN AST', '', (
+        SELECT *
+        FROM system.numbers
+    ))
+)
diff --git a/tests/queries/0_stateless/02990_format_select_from_explain.sh b/tests/queries/0_stateless/02990_format_select_from_explain.sh
new file mode 100755
index 00000000000..4955b733788
--- /dev/null
+++ b/tests/queries/0_stateless/02990_format_select_from_explain.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_FORMAT} --query "SELECT explain FROM (EXPLAIN AST SELECT * FROM system.numbers)"

From c8415ddc295b2c7f0ebe80aa16a090a6e4697f87 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 09:29:34 +0100
Subject: [PATCH 793/884] Speed up the CI

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 9e854dce65a..8858e12c50e 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -190,7 +190,7 @@ function setup_logs_replication
         echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2
 
         echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \
-            --distributed_ddl_task_timeout=30 \
+            --distributed_ddl_task_timeout=30 --distributed_ddl_output_mode=throw_only_active \
             "${CONNECTION_ARGS[@]}" || continue
 
         echo "Creating table system.${table}_sender" >&2

From 096a679d6b6f496eebc7e49b2fe0b9ca5b28b8ab Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 10:47:18 +0100
Subject: [PATCH 794/884] Insignificant changes

---
 src/Common/Dwarf.cpp            | 5 +++--
 src/Functions/addressToLine.cpp | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index a405f73e35e..99da3b75429 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -2067,8 +2067,8 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro
     if (opcode != 0)
     { // standard opcode
         // Only interpret opcodes that are recognized by the version we're parsing;
-        // the others are vendor extensions and we should ignore them.
-        switch (opcode) // NOLINT(bugprone-switch-missing-default-case)
+        // the others are vendor extensions, and we should ignore them.
+        switch (opcode)
         {
             case DW_LNS_copy:
                 basicBlock_ = false;
@@ -2121,6 +2121,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro
                 }
                 isa_ = readULEB(program);
                 return CONTINUE;
+            default:
         }
 
         // Unrecognized standard opcode, slurp the appropriate number of LEB
diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp
index 771c85cabf6..bb5edf2a07a 100644
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@@ -17,7 +17,7 @@ namespace DB
 namespace
 {
 
-class FunctionAddressToLine: public FunctionAddressToLineBase<StringRef, Dwarf::LocationInfoMode::FAST>
+class FunctionAddressToLine : public FunctionAddressToLineBase<StringRef, Dwarf::LocationInfoMode::FAST>
 {
 public:
     static constexpr auto name = "addressToLine";

From c2555dfb27c999c04a55ea5896b03b0094b8bf2f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Feb 2024 10:58:10 +0100
Subject: [PATCH 795/884] Better check for inconsistent formatting

---
 src/Interpreters/executeQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 10567fbafd4..1ab66f49ac7 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -721,7 +721,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             /// Verify that AST formatting is consistent:
             /// If you format AST, parse it back, and format it again, you get the same string.
 
-            String formatted1 = ast->formatForErrorMessage();
+            String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true);
 
             ASTPtr ast2 = parseQuery(parser,
                 formatted1.data(),
@@ -730,7 +730,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
             chassert(ast2);
 
-            String formatted2 = ast2->formatForErrorMessage();
+            String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true);
 
             if (formatted1 != formatted2)
                 throw Exception(ErrorCodes::LOGICAL_ERROR,

From 8f29320a73c394357b20495433a1ac919f8be9c6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 18 Feb 2024 13:15:24 +0100
Subject: [PATCH 796/884] Flush StorageBuffer into multiple threads if
 num_layers > 1

---
 src/Common/CurrentMetrics.cpp  |  3 +++
 src/Storages/StorageBuffer.cpp | 12 +++++++++++-
 src/Storages/StorageBuffer.h   |  2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index c6fbafa8dc3..6931001202d 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -262,6 +262,9 @@
     M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \
     M(RefreshableViews, "Number materialized views with periodic refreshing (REFRESH)") \
     M(RefreshingViews, "Number of materialized views currently executing a refresh") \
+    M(StorageBufferFlushThreads, "Number of threads for background flushes in StorageBuffer") \
+    M(StorageBufferFlushThreadsActive, "Number of threads for background flushes in StorageBuffer running a task") \
+    M(StorageBufferFlushThreadsScheduled, "Number of queued or active threads for background flushes in StorageBuffer")
 
 #ifdef APPLY_FOR_EXTERNAL_METRICS
     #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index d5c135bb81d..dbf6c7c7657 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -56,6 +56,9 @@ namespace CurrentMetrics
 {
     extern const Metric StorageBufferRows;
     extern const Metric StorageBufferBytes;
+    extern const Metric StorageBufferFlushThreads;
+    extern const Metric StorageBufferFlushThreadsActive;
+    extern const Metric StorageBufferFlushThreadsScheduled;
 }
 
 
@@ -131,6 +134,7 @@ StorageBuffer::StorageBuffer(
     : IStorage(table_id_)
     , WithContext(context_->getBufferContext())
     , num_shards(num_shards_)
+    , flush_pool(CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, num_shards, 0, num_shards_)
     , buffers(num_shards_)
     , min_thresholds(min_thresholds_)
     , max_thresholds(max_thresholds_)
@@ -802,7 +806,13 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes,
 void StorageBuffer::flushAllBuffers(bool check_thresholds)
 {
     for (auto & buf : buffers)
-        flushBuffer(buf, check_thresholds, false);
+    {
+        flush_pool.scheduleOrThrowOnError([&] ()
+        {
+            flushBuffer(buf, check_thresholds, false);
+        });
+    }
+    flush_pool.wait();
 }
 
 
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 47f6239b173..50f12be5aef 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -3,6 +3,7 @@
 #include <Core/BackgroundSchedulePool.h>
 #include <Core/NamesAndTypes.h>
 #include <Storages/IStorage.h>
+#include <Common/ThreadPool.h>
 
 #include <Poco/Event.h>
 
@@ -149,6 +150,7 @@ private:
 
     /// There are `num_shards` of independent buffers.
     const size_t num_shards;
+    ThreadPool flush_pool;
     std::vector<Buffer> buffers;
 
     const Thresholds min_thresholds;

From 29e3e7cb965b5ada347028282e87005c570d3400 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 18 Feb 2024 13:19:35 +0100
Subject: [PATCH 797/884] Better if only 1 layer

---
 src/Storages/StorageBuffer.cpp | 22 ++++++++++++++++++----
 src/Storages/StorageBuffer.h   |  2 +-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index dbf6c7c7657..c2b63101d11 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -134,7 +134,6 @@ StorageBuffer::StorageBuffer(
     : IStorage(table_id_)
     , WithContext(context_->getBufferContext())
     , num_shards(num_shards_)
-    , flush_pool(CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, num_shards, 0, num_shards_)
     , buffers(num_shards_)
     , min_thresholds(min_thresholds_)
     , max_thresholds(max_thresholds_)
@@ -157,6 +156,12 @@ StorageBuffer::StorageBuffer(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
+    if (num_shards > 1)
+    {
+        flush_pool = std::make_unique<ThreadPool>(
+            CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled,
+            num_shards, 0, num_shards);
+    }
     flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ backgroundFlush(); });
 }
 
@@ -807,12 +812,21 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds)
 {
     for (auto & buf : buffers)
     {
-        flush_pool.scheduleOrThrowOnError([&] ()
+        if (flush_pool)
+        {
+            flush_pool->scheduleOrThrowOnError([&] ()
+            {
+                flushBuffer(buf, check_thresholds, false);
+            });
+        }
+        else
         {
             flushBuffer(buf, check_thresholds, false);
-        });
+        }
     }
-    flush_pool.wait();
+
+    if (flush_pool)
+        flush_pool->wait();
 }
 
 
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 50f12be5aef..6c15c7e0238 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -150,7 +150,7 @@ private:
 
     /// There are `num_shards` of independent buffers.
     const size_t num_shards;
-    ThreadPool flush_pool;
+    std::unique_ptr<ThreadPool> flush_pool;
     std::vector<Buffer> buffers;
 
     const Thresholds min_thresholds;

From 6fd563df523006946a5ebc44f350114d541aae56 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 02:58:51 +0100
Subject: [PATCH 798/884] Miscellaneous

---
 src/AggregateFunctions/AggregateFunctionCount.h           | 2 +-
 src/AggregateFunctions/AggregateFunctionFactory.cpp       | 4 ++--
 .../Combinators/AggregateFunctionIf.cpp                   | 2 +-
 .../Combinators/AggregateFunctionNull.h                   | 2 +-
 src/Client/MultiplexedConnections.cpp                     | 2 +-
 src/Columns/ColumnArray.cpp                               | 4 ++--
 src/Columns/ColumnNullable.cpp                            | 6 +-----
 src/Columns/getLeastSuperColumn.cpp                       | 2 +-
 src/Common/SensitiveDataMasker.cpp                        | 2 +-
 src/Common/SipHash.h                                      | 2 +-
 src/Common/checkStackSize.cpp                             | 2 +-
 src/Coordination/KeeperStorage.cpp                        | 2 +-
 src/DataTypes/DataTypeAggregateFunction.cpp               | 2 +-
 src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp   | 2 +-
 src/Functions/EmptyImpl.h                                 | 2 +-
 src/Functions/FunctionsComparison.h                       | 6 +++---
 src/Functions/FunctionsLogical.cpp                        | 2 +-
 src/Functions/trap.cpp                                    | 2 +-
 src/Interpreters/Aggregator.cpp                           | 6 +++---
 src/Interpreters/ArrayJoinedColumnsVisitor.h              | 2 +-
 src/Interpreters/ClientInfo.cpp                           | 4 ++--
 src/Interpreters/CrossToInnerJoinVisitor.cpp              | 4 ++--
 src/Interpreters/DatabaseAndTableWithAlias.cpp            | 2 +-
 src/Interpreters/HashJoin.cpp                             | 2 +-
 src/Interpreters/InJoinSubqueriesPreprocessor.cpp         | 4 ++--
 src/Interpreters/InterpreterSelectWithUnionQuery.cpp      | 2 +-
 src/Interpreters/JoinToSubqueryTransformVisitor.cpp       | 4 ++--
 src/Interpreters/MergeJoin.cpp                            | 2 +-
 src/Interpreters/ProcessList.cpp                          | 4 ++--
 src/Interpreters/Set.cpp                                  | 4 ++--
 src/Interpreters/SetVariants.cpp                          | 2 +-
 src/Interpreters/TablesStatus.cpp                         | 2 +-
 src/Interpreters/TranslateQualifiedNamesVisitor.cpp       | 2 +-
 src/Interpreters/evaluateConstantExpression.cpp           | 2 +-
 src/Parsers/ExpressionElementParsers.cpp                  | 2 +-
 src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp | 2 +-
 .../Formats/RowInputFormatWithDiagnosticInfo.cpp          | 2 +-
 src/Processors/Sources/WaitForAsyncInsertSource.h         | 2 +-
 src/Processors/Transforms/CreatingSetsTransform.cpp       | 2 +-
 src/Processors/Transforms/getSourceFromASTInsertQuery.cpp | 6 +++---
 src/QueryPipeline/ExecutionSpeedLimits.cpp                | 2 +-
 src/Server/TCPHandler.cpp                                 | 2 +-
 src/Storages/MergeTree/DataPartsExchange.cpp              | 2 +-
 src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp       | 4 ++--
 src/Storages/MergeTree/MergeTreeData.cpp                  | 8 ++++----
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp     | 4 ++--
 src/Storages/MergeTree/MergeTreeDataWriter.cpp            | 6 +++---
 src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp  | 8 ++++----
 src/Storages/StorageJoin.cpp                              | 2 +-
 src/Storages/StorageLog.cpp                               | 6 +++---
 src/Storages/StorageReplicatedMergeTree.cpp               | 8 ++++----
 src/Storages/StorageView.cpp                              | 8 ++++----
 src/Storages/System/StorageSystemStackTrace.cpp           | 2 +-
 src/Storages/transformQueryForExternalDatabase.cpp        | 2 +-
 utils/check-style/check-style                             | 5 +++++
 55 files changed, 91 insertions(+), 90 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h
index 36a8617ba91..f5d6030a777 100644
--- a/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/src/AggregateFunctions/AggregateFunctionCount.h
@@ -219,7 +219,7 @@ public:
         : IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullUnary>({argument}, params, createResultType())
     {
         if (!argument->isNullable())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not Nullable data type passed to AggregateFunctionCountNotNullUnary");
     }
 
     String getName() const override { return "count"; }
diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp
index b6ba562045d..18edb7c8ce0 100644
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -100,7 +100,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
     {
         AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null");
         if (!combinator)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot find aggregate function combinator "
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find aggregate function combinator "
                             "to apply a function to Nullable arguments.");
 
         DataTypes nested_types = combinator->transformArguments(types_without_low_cardinality);
@@ -123,7 +123,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
     auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
 
     if (!with_original_arguments)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory returned nullptr");
     return with_original_arguments;
 }
 
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
index 20a4bde6bb4..9b5ee79a533 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
@@ -249,7 +249,7 @@ public:
         : Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size())
     {
         if (number_of_arguments == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: single argument is passed to AggregateFunctionIfNullVariadic");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Single argument is passed to AggregateFunctionIfNullVariadic");
 
         if (number_of_arguments > MAX_ARGS)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
index 6b6580bf4c4..c8574e82be5 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
@@ -429,7 +429,7 @@ public:
         , number_of_arguments(arguments.size())
     {
         if (number_of_arguments == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: single argument is passed to AggregateFunctionNullVariadic");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Single argument is passed to AggregateFunctionNullVariadic");
 
         if (number_of_arguments > MAX_ARGS)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index c7d7d0143c8..8433c8afe9f 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -320,7 +320,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
     ReplicaState & state = getReplicaForReading();
     current_connection = state.connection;
     if (current_connection == nullptr)
-        throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
+        throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "No available replica");
 
     Packet packet;
     try
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 6f60ec0e642..b3376b35b2e 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -810,7 +810,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
     size_t tuple_size = tuple.tupleSize();
 
     if (tuple_size == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty tuple");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
 
     Columns temporary_arrays(tuple_size);
     for (size_t i = 0; i < tuple_size; ++i)
@@ -1263,7 +1263,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
     size_t tuple_size = tuple.tupleSize();
 
     if (tuple_size == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty tuple");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
 
     Columns temporary_arrays(tuple_size);
     for (size_t i = 0; i < tuple_size; ++i)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 93638371b84..ddf5fc696fb 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -1,7 +1,5 @@
 #include <Common/Arena.h>
 #include <Common/SipHash.h>
-#include <Common/NaNUtils.h>
-#include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/WeakHash.h>
 #include <Columns/ColumnDecimal.h>
@@ -28,7 +26,6 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_COLUMN;
-    extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
     extern const int NOT_IMPLEMENTED;
 }
 
@@ -829,8 +826,7 @@ void ColumnNullable::applyNullMap(const ColumnNullable & other)
 void ColumnNullable::checkConsistency() const
 {
     if (null_map->size() != getNestedColumn().size())
-        throw Exception(ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT,
-                        "Logical error: Sizes of nested column and null map of Nullable column are not equal");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of nested column and null map of Nullable column are not equal");
 }
 
 ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
diff --git a/src/Columns/getLeastSuperColumn.cpp b/src/Columns/getLeastSuperColumn.cpp
index 6ec5ca7a9c1..4f4a5f2b9b8 100644
--- a/src/Columns/getLeastSuperColumn.cpp
+++ b/src/Columns/getLeastSuperColumn.cpp
@@ -21,7 +21,7 @@ static bool sameConstants(const IColumn & a, const IColumn & b)
 ColumnWithTypeAndName getLeastSuperColumn(const std::vector<const ColumnWithTypeAndName *> & columns)
 {
     if (columns.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no src columns for supercolumn");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No src columns for supercolumn");
 
     ColumnWithTypeAndName result = *columns[0];
 
diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index 70346919f65..28eae6f451d 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -91,7 +91,7 @@ void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker>&& sen
 {
 
     if (!sensitive_data_masker_)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: the 'sensitive_data_masker' is not set");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The 'sensitive_data_masker' is not set");
 
     if (sensitive_data_masker_->rulesCount() > 0)
     {
diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h
index 5f27fdaa4b6..729fb76a573 100644
--- a/src/Common/SipHash.h
+++ b/src/Common/SipHash.h
@@ -209,7 +209,7 @@ public:
     {
         if (!is_reference_128)
             throw DB::Exception(
-                DB::ErrorCodes::LOGICAL_ERROR, "Logical error: can't call get128Reference when is_reference_128 is not set");
+                DB::ErrorCodes::LOGICAL_ERROR, "Can't call get128Reference when is_reference_128 is not set");
         finalize();
         const auto lo = v0 ^ v1 ^ v2 ^ v3;
         v1 ^= 0xdd;
diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index 8c2a0aaed7f..1f4faf58fa5 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -136,7 +136,7 @@ __attribute__((__weak__)) void checkStackSize()
 
     /// We assume that stack grows towards lower addresses. And that it starts to grow from the end of a chunk of memory of max_stack_size.
     if (int_frame_address > int_stack_address + max_stack_size)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: frame address is greater than stack begin address");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Frame address is greater than stack begin address");
 
     size_t stack_size = int_stack_address + max_stack_size - int_frame_address;
     size_t max_stack_size_allowed = static_cast<size_t>(max_stack_size * STACK_SIZE_FREE_RATIO);
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index c62ed90e378..eaa0c3c9e68 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -1583,7 +1583,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
         {
             auto path_prefix = request.path;
             if (path_prefix.empty())
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: path cannot be empty");
+                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Path cannot be empty");
 
             const auto & children = node_it->value.getChildren();
             response.names.reserve(children.size());
diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp
index 7dc036cafa4..14a3c6a4248 100644
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -239,7 +239,7 @@ static DataTypePtr create(const ASTPtr & arguments)
         argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
 
     if (function_name.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty name of aggregate function passed");
 
     AggregateFunctionProperties properties;
     AggregateFunctionPtr function = AggregateFunctionFactory::instance().get(function_name, action, argument_types, params_row, properties);
diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
index aa3b154e49b..ee9870eb0ef 100644
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -141,7 +141,7 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
         argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
 
     if (function_name.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty name of aggregate function passed");
 
     AggregateFunctionProperties properties;
     /// NullsAction is not part of the type definition, instead it will have transformed the function into a different one
diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h
index 52484524e6a..d3b2dda024b 100644
--- a/src/Functions/EmptyImpl.h
+++ b/src/Functions/EmptyImpl.h
@@ -35,7 +35,7 @@ struct EmptyImpl
     /// Only make sense if is_fixed_to_constant.
     static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/)
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: 'vectorFixedToConstant method' is called");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called");
     }
 
     static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index d04f76d051a..3be675f39b3 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -811,7 +811,7 @@ private:
                 c0_const_size = c0_const_fixed_string->getN();
             }
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column");
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnConst contains not String nor FixedString column");
         }
 
         if (c1_const)
@@ -830,7 +830,7 @@ private:
                 c1_const_size = c1_const_fixed_string->getN();
             }
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column");
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnConst contains not String nor FixedString column");
         }
 
         using StringImpl = StringComparisonImpl<Op<int, int>>;
@@ -1114,7 +1114,7 @@ private:
         /// This is a paranoid check to protect from a broken query analysis.
         if (c0->isNullable() != c1->isNullable())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Logical error: columns are assumed to be of identical types, but they are different in Nullable");
+                "Columns are assumed to be of identical types, but they are different in Nullable");
 
         if (c0_const && c1_const)
         {
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index d01fdc99076..2e0f4cd3038 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -530,7 +530,7 @@ DataTypePtr FunctionAnyArityLogical<Impl, Name>::getReturnTypeImpl(const DataTyp
         {
             has_nullable_arguments = arg_type->isNullable();
             if (has_nullable_arguments && !Impl::specialImplementationForNulls())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Unexpected type of argument for function \"{}\": "
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of argument for function \"{}\": "
                     " argument {} is of type {}", getName(), i + 1, arg_type->getName());
         }
 
diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp
index 99430f039a4..6ce696fedb5 100644
--- a/src/Functions/trap.cpp
+++ b/src/Functions/trap.cpp
@@ -177,7 +177,7 @@ public:
             }
             else if (mode == "logical error")
             {
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Trap");
             }
             else
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode");
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 331cd991ea1..50fab486568 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -624,7 +624,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_)
         {
             size_t alignment_of_next_state = params.aggregates[i + 1].function->alignOfData();
             if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: alignOfData is not 2^N");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "`alignOfData` is not 2^N");
 
             /// Extend total_size to next alignment requirement
             /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state.
@@ -857,7 +857,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
                 return AggregatedDataVariants::Type::low_cardinality_keys128;
             if (size_of_field == 32)
                 return AggregatedDataVariants::Type::low_cardinality_keys256;
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "LowCardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
         }
 
         if (size_of_field == 1)
@@ -872,7 +872,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
             return AggregatedDataVariants::Type::keys128;
         if (size_of_field == 32)
             return AggregatedDataVariants::Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     if (params.keys_size == 1 && isFixedString(types_removed_nullable[0]))
diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h
index 3bbd6982213..f16751c4561 100644
--- a/src/Interpreters/ArrayJoinedColumnsVisitor.h
+++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h
@@ -62,7 +62,7 @@ private:
     {
         auto [array_join_expression_list, _] = node.arrayJoinExpressionList();
         if (!array_join_expression_list)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no ARRAY JOIN");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "No ARRAY JOIN");
 
         std::vector<ASTPtr *> out;
         out.reserve(array_join_expression_list->children.size());
diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp
index 347ec115aba..e4778edeb9c 100644
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@@ -23,7 +23,7 @@ namespace ErrorCodes
 void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 {
     if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::write is called for unsupported server revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::write is called for unsupported server revision");
 
     writeBinary(static_cast<UInt8>(query_kind), out);
     if (empty())
@@ -103,7 +103,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision)
 {
     if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::read is called for unsupported client revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::read is called for unsupported client revision");
 
     UInt8 read_query_kind = 0;
     readBinary(read_query_kind, in);
diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp
index 42af164f4ad..e3e8b80e437 100644
--- a/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp
@@ -173,7 +173,7 @@ std::vector<JoinedElement> getTables(const ASTSelectQuery & select)
     {
         const auto * table_element = child->as<ASTTablesInSelectQueryElement>();
         if (!table_element)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: TablesInSelectQueryElement expected");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "TablesInSelectQueryElement expected");
 
         JoinedElement & t = joined_tables.emplace_back(*table_element);
         t.rewriteCommaToCross();
@@ -224,7 +224,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
     {
         if (joined_tables.size() != data.tables_with_columns.size())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
-                            "Logical error: inconsistent number of tables: {} != {}",
+                            "Inconsistent number of tables: {} != {}",
                             joined_tables.size(), data.tables_with_columns.size());
 
         for (size_t i = 0; i < joined_tables.size(); ++i)
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp
index db020cb9166..329391b45d7 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.cpp
+++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp
@@ -71,7 +71,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression &
         alias = table_expression.subquery->tryGetAlias();
     }
     else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no known elements in ASTTableExpression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No known elements in ASTTableExpression");
 }
 
 bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias) const
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 33dc178ca00..73487a0914a 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -368,7 +368,7 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c
             return Type::keys128;
         if (size_of_field == 32)
             return Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
index 3858830a43b..ec4241a2740 100644
--- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
+++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
@@ -103,12 +103,12 @@ private:
                     /// Already processed.
                 }
                 else
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected function name {}", concrete->name);
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name {}", concrete->name);
             }
             else if (table_join)
                 table_join->locality = JoinLocality::Global;
             else
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected AST node");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST node");
         }
         else if (distributed_product_mode == DistributedProductMode::DENY)
         {
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index 16bc4b1fe2e..cc1d7dd6531 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -56,7 +56,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
 
     size_t num_children = ast->list_of_selects->children.size();
     if (!num_children)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTSelectWithUnionQuery");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTSelectWithUnionQuery");
 
     /// Note that we pass 'required_result_column_names' to first SELECT.
     /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index bf2d1eb79cd..6251a9604e1 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -168,7 +168,7 @@ private:
                 has_asterisks = true;
 
                 if (!qualified_asterisk->qualifier)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier");
 
                 auto & identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
 
@@ -183,7 +183,7 @@ private:
                             transformer->as<ASTColumnsReplaceTransformer>())
                             IASTColumnsTransformer::transform(transformer, columns);
                         else
-                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must only have children of IASTColumnsTransformer type");
+                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must only have children of IASTColumnsTransformer type");
                     }
                 }
             }
diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index 901c82029ee..d5fb0208d45 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -239,7 +239,7 @@ public:
 
         /// SortCursorImpl can work with permutation, but MergeJoinCursor can't.
         if (impl.permutation)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinCursor doesn't support permutation");
     }
 
     size_t position() const { return impl.getRow(); }
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 5b3b87114ae..3bd7b2d4206 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -295,7 +295,7 @@ ProcessListEntry::~ProcessListEntry()
     auto user_process_list_it = parent.user_to_queries.find(user);
     if (user_process_list_it == parent.user_to_queries.end())
     {
-        LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find user in ProcessList");
+        LOG_ERROR(getLogger("ProcessList"), "Cannot find user in ProcessList");
         std::terminate();
     }
 
@@ -323,7 +323,7 @@ ProcessListEntry::~ProcessListEntry()
 
     if (!found)
     {
-        LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser");
+        LOG_ERROR(getLogger("ProcessList"), "Cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser");
         std::terminate();
     }
 
diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp
index 84260faafd4..8f11754b3be 100644
--- a/src/Interpreters/Set.cpp
+++ b/src/Interpreters/Set.cpp
@@ -275,7 +275,7 @@ void Set::appendSetElements(SetKeyColumns & holder)
 void Set::checkIsCreated() const
 {
     if (!is_created.load())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Trying to use set before it has been built.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to use set before it has been built.");
 }
 
 ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) const
@@ -283,7 +283,7 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co
     size_t num_key_columns = columns.size();
 
     if (0 == num_key_columns)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No columns passed to Set::execute method.");
 
     auto res = ColumnUInt8::create();
     ColumnUInt8::Container & vec_res = res->getData();
diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp
index cd9148a01cf..0fb2e5189d4 100644
--- a/src/Interpreters/SetVariants.cpp
+++ b/src/Interpreters/SetVariants.cpp
@@ -146,7 +146,7 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
             return Type::keys128;
         if (size_of_field == 32)
             return Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
diff --git a/src/Interpreters/TablesStatus.cpp b/src/Interpreters/TablesStatus.cpp
index 005a4515c3a..911a028f813 100644
--- a/src/Interpreters/TablesStatus.cpp
+++ b/src/Interpreters/TablesStatus.cpp
@@ -35,7 +35,7 @@ void TableStatus::read(ReadBuffer & in)
 void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 {
     if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method TablesStatusRequest::write is called for unsupported server revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method TablesStatusRequest::write is called for unsupported server revision");
 
     writeVarUInt(tables.size(), out);
     for (const auto & table_name : tables)
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 130ce2194fd..3de7e217e53 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -158,7 +158,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
 void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
 {
     if (!node.qualifier)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier");
 
     /// @note it could contain table alias as table name.
     DatabaseAndTableWithAlias db_and_table(node.qualifier);
diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index 00d36750cc1..b5c3e00e299 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -106,7 +106,7 @@ std::optional<EvaluateConstantExpressionResult> evaluateConstantExpressionImpl(c
 
     if (result_column->empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Logical error: empty result column after evaluation "
+                        "Empty result column after evaluation "
                         "of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument");
 
     /// Expressions like rand() or now() are not constant
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 62c480e0f6b..486555ae86d 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -934,7 +934,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         {
             if (float_value < 0)
                 throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Logical error: token number cannot begin with minus, "
+                                "Token number cannot begin with minus, "
                                 "but parsed float number is less than zero.");
 
             if (negative)
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 0ef19a9c14f..6fa94356cd3 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -179,7 +179,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
             else if (column_index == NESTED_FIELD)
                 readNestedData(name_ref.toString(), columns);
             else
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: illegal value of column_index");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal value of column_index");
         }
         else
         {
diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
index a56c24a740a..fcf338577f8 100644
--- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
+++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
@@ -136,7 +136,7 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co
     auto * curr_position = in->position();
 
     if (curr_position < prev_position)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: parsing is non-deterministic.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parsing is non-deterministic.");
 
     if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type))
     {
diff --git a/src/Processors/Sources/WaitForAsyncInsertSource.h b/src/Processors/Sources/WaitForAsyncInsertSource.h
index 1029c164941..78af6294202 100644
--- a/src/Processors/Sources/WaitForAsyncInsertSource.h
+++ b/src/Processors/Sources/WaitForAsyncInsertSource.h
@@ -33,7 +33,7 @@ protected:
     {
         auto status = insert_future.wait_for(std::chrono::milliseconds(timeout_ms));
         if (status == std::future_status::deferred)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state");
 
         if (status == std::future_status::timeout)
             throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms);
diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp
index cc0b5926e66..eeb8f4a6060 100644
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@@ -163,7 +163,7 @@ void CreatingSetsTransform::startSubquery()
     done_with_table = !external_table;
 
     if ((done_with_set && !set_from_cache) && done_with_table)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to do with subquery");
 
     if (table_out.initialized())
     {
diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
index 6c7c7447070..8a13973b970 100644
--- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
+++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
@@ -37,7 +37,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
     const auto * ast_insert_query = ast->as<ASTInsertQuery>();
 
     if (!ast_insert_query)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query");
 
     if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER)
         throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Query has infile and was send directly to server");
@@ -47,7 +47,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
         if (input_function)
             throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");
         else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: INSERT query requires format to be set");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "INSERT query requires format to be set");
     }
 
     /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
@@ -105,7 +105,7 @@ std::unique_ptr<ReadBuffer> getReadBufferFromASTInsertQuery(const ASTPtr & ast)
 {
     const auto * insert_query = ast->as<ASTInsertQuery>();
     if (!insert_query)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query");
 
     if (insert_query->infile)
     {
diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp
index f8ae4c76d0f..05fd394db77 100644
--- a/src/QueryPipeline/ExecutionSpeedLimits.cpp
+++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp
@@ -113,7 +113,7 @@ static bool handleOverflowMode(OverflowMode mode, int code, FormatStringHelper<A
             ProfileEvents::increment(ProfileEvents::OverflowBreak);
             return false;
         default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown overflow mode");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown overflow mode");
     }
 }
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 833f8ecc818..9464ef74586 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -943,7 +943,7 @@ void TCPHandler::processInsertQuery()
                 auto wait_status = result.future.wait_for(std::chrono::milliseconds(timeout_ms));
 
                 if (wait_status == std::future_status::deferred)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state");
 
                 if (wait_status == std::future_status::timeout)
                     throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms);
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index ce70fbe18e5..168c5f729ce 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -903,7 +903,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
         || part_name.empty()
         || std::string::npos != tmp_prefix.find_first_of("/.")
         || std::string::npos != part_name.find_first_of("/."))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters.");
 
     auto part_dir = tmp_prefix + part_name;
     auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : "");
diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
index 1ffb5177430..cbdeabffa97 100644
--- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
+++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
@@ -17,7 +17,7 @@ EphemeralLockInZooKeeper::EphemeralLockInZooKeeper(const String & path_prefix_,
     : zookeeper(zookeeper_), path_prefix(path_prefix_), path(path_), conflict_path(conflict_path_)
 {
     if (conflict_path.empty() && path.size() <= path_prefix.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the main node is shorter than prefix.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the main node is shorter than prefix.");
 }
 
 template <typename T>
@@ -179,7 +179,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
             size_t prefix_size = block_numbers_path.size() + 1 + partitions[i].size() + 1 + path_prefix.size();
             const String & path = dynamic_cast<const Coordination::CreateResponse &>(*lock_responses[i]).path_created;
             if (path.size() <= prefix_size)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the sequential node is shorter than prefix.");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the sequential node is shorter than prefix.");
 
             UInt64 number = parse<UInt64>(path.c_str() + prefix_size, path.size() - prefix_size);
             locks.push_back(LockInfo{path, partitions[i], number});
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e14a358745e..5b297de3fda 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -869,7 +869,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Sign column for storage {} is empty", storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Sign column for storage {} is empty", storage);
         }
 
         bool miss_column = true;
@@ -896,7 +896,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column for storage {} is empty", storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column for storage {} is empty", storage);
         }
 
         bool miss_column = true;
@@ -925,12 +925,12 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "`is_deleted` ({}) column for storage {} is empty", is_deleted_column, storage);
         }
         else
         {
             if (version_column.empty() && !is_optional)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.",
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column ({}) for storage {} is empty while is_deleted ({}) is not.",
                                 version_column, storage, is_deleted_column);
 
             bool miss_is_deleted_column = true;
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 58fddde7b54..1bf1d4a3c29 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -85,7 +85,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t max_coun
     if (scheduled_tasks_count > max_count)
     {
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Logical error: invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}",
+            "Invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}",
             scheduled_tasks_count, max_count);
     }
 
@@ -511,7 +511,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
         /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl
         if (parts_to_merge.size() == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge selector returned only one part to merge");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge selector returned only one part to merge");
 
         if (parts_to_merge.empty())
         {
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index c9c16b59f9e..ebf887f5e9e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -384,13 +384,13 @@ Block MergeTreeDataWriter::mergeBlock(
 
     /// Check that after first merge merging_algorithm is waiting for data from input 0.
     if (status.required_source != 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName());
 
     status = merging_algorithm->merge();
 
     /// Check that merge is finished.
     if (!status.is_finished)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge is not finished after the second merge.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge is not finished after the second merge.");
 
     /// Merged Block is sorted and we don't need to use permutation anymore
     permutation = nullptr;
@@ -439,7 +439,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         auto max_month = date_lut.toNumYYYYMM(max_date);
 
         if (min_month != max_month)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: part spans more than one month.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part spans more than one month.");
 
         part_name = new_part_info.getPartNameV0(min_date, max_date);
     }
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
index 4e339964de3..da89d52a9ff 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
@@ -54,9 +54,9 @@ MarkType::MarkType(bool adaptive_, bool compressed_, MergeTreeDataPartType::Valu
     : adaptive(adaptive_), compressed(compressed_), part_type(part_type_)
 {
     if (!adaptive && part_type != MergeTreeDataPartType::Wide)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity");
     if (part_type == MergeTreeDataPartType::Unknown)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type");
 }
 
 bool MarkType::isMarkFileExtension(std::string_view extension)
@@ -71,7 +71,7 @@ std::string MarkType::getFileExtension() const
     if (!adaptive)
     {
         if (part_type != MergeTreeDataPartType::Wide)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity");
         return res;
     }
 
@@ -84,7 +84,7 @@ std::string MarkType::getFileExtension() const
         case MergeTreeDataPartType::InMemory:
             return "";
         case MergeTreeDataPartType::Unknown:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type");
     }
 }
 
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index b9e082c0b22..b122674466f 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -500,7 +500,7 @@ protected:
         Chunk chunk;
         if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(),
                 [&](auto kind, auto strictness, auto & map) { chunk = createChunk<kind, strictness>(map); }))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown JOIN strictness");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness");
         return chunk;
     }
 
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index c7b0a9d0644..99192fe1e50 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -241,7 +241,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu
 
             const auto & data_file_it = storage.data_files_by_names.find(data_file_name);
             if (data_file_it == storage.data_files_by_names.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name);
             const auto & data_file = *data_file_it->second;
 
             size_t offset = stream_for_prefix ? 0 : offsets[data_file.index];
@@ -448,7 +448,7 @@ ISerialization::OutputStreamGetter LogSink::createStreamGetter(const NameAndType
         String data_file_name = ISerialization::getFileNameForStream(name_and_type, path);
         auto it = streams.find(data_file_name);
         if (it == streams.end())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: stream was not created when writing data in LogSink");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Stream was not created when writing data in LogSink");
 
         Stream & stream = it->second;
         if (stream.written)
@@ -473,7 +473,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c
         {
             const auto & data_file_it = storage.data_files_by_names.find(data_file_name);
             if (data_file_it == storage.data_files_by_names.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name);
 
             const auto & data_file = *data_file_it->second;
             const auto & columns = metadata_snapshot->getColumns();
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6119541ff52..fb4e9b4ad87 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2043,7 +2043,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
             if (entry.quorum)
             {
                 if (entry.type != LogEntry::GET_PART)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum but type is not GET_PART");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum but type is not GET_PART");
 
                 LOG_DEBUG(log, "No active replica has part {} which needs to be written with quorum. Will try to mark that quorum as failed.", entry.new_part_name);
 
@@ -2106,7 +2106,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
                         auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
 
                         if (part_info.min_block != part_info.max_block)
-                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum for part covering more than one block number");
+                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum for part covering more than one block number");
 
                         ops.emplace_back(zkutil::makeCreateRequest(
                             fs::path(zookeeper_path) / "quorum" / "failed_parts" / entry.new_part_name,
@@ -6796,7 +6796,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry(
     }
     else
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected name of log node: {}", entry.znode_name);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected name of log node: {}", entry.znode_name);
     }
 
     /** Second - find the corresponding entry in the queue of the specified replica.
@@ -7172,7 +7172,7 @@ void StorageReplicatedMergeTree::fetchPartition(
     }
 
     if (best_replica.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot choose best replica.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot choose best replica.");
 
     LOG_INFO(log, "Found {} replicas, {} of them are active. Selected {} to fetch from.", replicas.size(), active_replicas.size(), best_replica);
 
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 181fd0ac61c..5679effbcb2 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -207,12 +207,12 @@ void StorageView::read(
 static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query)
 {
     if (!select_query.tables() || select_query.tables()->children.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no table expression in view select AST");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No table expression in view select AST");
 
     auto * select_element = select_query.tables()->children[0]->as<ASTTablesInSelectQueryElement>();
 
     if (!select_element->table_expression)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
 
     return select_element->table_expression->as<ASTTableExpression>();
 }
@@ -243,7 +243,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
 
         }
         if (!table_expression->database_and_table_name)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
     }
 
     DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name);
@@ -270,7 +270,7 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr
     ASTTableExpression * table_expression = getFirstTableExpression(select_query);
 
     if (!table_expression->subquery)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
 
     ASTPtr subquery = table_expression->subquery;
     table_expression->subquery = {};
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index 82a5fd4e33f..90eb0ad89ec 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -168,7 +168,7 @@ bool wait(int timeout_ms)
                 continue;   /// Drain delayed notifications.
         }
 
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: read wrong number of bytes from pipe");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Read wrong number of bytes from pipe");
     }
 }
 
diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp
index 4526a38a1c3..afc458ea612 100644
--- a/src/Storages/transformQueryForExternalDatabase.cpp
+++ b/src/Storages/transformQueryForExternalDatabase.cpp
@@ -145,7 +145,7 @@ bool isCompatible(ASTPtr & node)
             return false;
 
         if (!function->arguments)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "function->arguments is not set");
 
         String name = function->name;
 
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 6c12970c4bb..a71dac91683 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -448,3 +448,8 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
     xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
     echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
+
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
+    echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."

From 6b2d89c78394f50c0053551b796cb5d2228c142f Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 19 Feb 2024 10:17:51 +0800
Subject: [PATCH 799/884] change as request

---
 .../0_stateless/02985_if_over_big_int_decimal.reference   | 6 ++++++
 .../queries/0_stateless/02985_if_over_big_int_decimal.sql | 8 ++++++++
 2 files changed, 14 insertions(+)

diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
index 055103ad134..1dfad945ee2 100644
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
@@ -4,3 +4,9 @@
 49500
 49500
 49500
+450000
+450000
+450000
+450000
+450000
+450000
diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
index 6868524d195..0295a64a092 100644
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
@@ -4,3 +4,11 @@ select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
 select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
 select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
 select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);
+
+-- Test when the condition is neither 0 nor 1
+select sumIf(number::Int128, number % 10) from numbers(1000);
+select sumIf(number::UInt128, number % 10) from numbers(1000);
+select sumIf(number::Int256, number % 10) from numbers(1000);
+select sumIf(number::UInt256, number % 10) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10) from numbers(1000);

From fa1ca348a4326a96a58d74e3ad4bac738c2b22c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 03:19:21 +0100
Subject: [PATCH 800/884] Miscellaneous

---
 src/Core/MySQL/PacketEndpoint.cpp                             | 2 +-
 src/Databases/MySQL/MaterializedMySQLSyncThread.cpp           | 4 ++--
 src/Functions/FunctionsConversion.h                           | 2 +-
 src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp          | 2 +-
 src/Server/HTTPHandlerFactory.cpp                             | 2 +-
 src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp | 2 +-
 src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Core/MySQL/PacketEndpoint.cpp b/src/Core/MySQL/PacketEndpoint.cpp
index 97b5d3b4d11..085d7595167 100644
--- a/src/Core/MySQL/PacketEndpoint.cpp
+++ b/src/Core/MySQL/PacketEndpoint.cpp
@@ -40,7 +40,7 @@ bool PacketEndpoint::tryReceivePacket(IMySQLReadPacket & packet, UInt64 millisec
         ReadBufferFromPocoSocket * socket_in = typeid_cast<ReadBufferFromPocoSocket *>(in);
 
         if (!socket_in)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Attempt to pull the duration in a non socket stream");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to pull the duration in a non socket stream");
 
         if (!socket_in->poll(millisecond * 1000))
             return false;
diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
index 2656835f912..20db8036942 100644
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@@ -779,7 +779,7 @@ static void writeFieldsToColumn(
                         casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num);
                     }
                     else
-                        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: it is a bug.");
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
                 }
             }
         }
@@ -844,7 +844,7 @@ static inline bool differenceSortingKeys(const Tuple & row_old_data, const Tuple
 static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t version, const std::vector<size_t> & sorting_columns_index)
 {
     if (rows_data.size() % 2 != 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: It is a bug.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
 
     size_t prev_bytes = buffer.bytes();
     std::vector<bool> writeable_rows_mask(rows_data.size());
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 4089a5b542b..62148fa8022 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2560,7 +2560,7 @@ public:
             if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
                 res = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false));
             else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: It is a bug.");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
             else if constexpr (to_decimal)
             {
                 UInt64 scale = extractToDecimalScale(arguments[1]);
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
index 0fdc9347ee9..107b435ded4 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@@ -337,7 +337,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys)
         WhichDataType which(type);
 
         if (which.isNullable())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: MySQL primary key must be not null, it is a bug.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "MySQL's primary key must be not null, it is a bug.");
 
         if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64())
         {
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 06ca1182be5..9e4a440ddb2 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -120,7 +120,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::
         return createPrometheusMainHandlerFactory(server, config, metrics_writer, name);
     }
 
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name.");
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name.");
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index da49814b83a..f506230b5ea 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -59,7 +59,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
     const auto * non_const_column = typeid_cast<const ColumnUInt64 *>(hash_column);
 
     if (!const_column && !non_const_column)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: hash column must be Const Column or UInt64 Column.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Hash column must be Const or UInt64.");
 
     if (const_column)
     {
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index db85c804d8d..8029d6d405b 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -53,7 +53,7 @@ public:
         if (const auto & bf_granule = typeid_cast<const MergeTreeIndexGranuleBloomFilter *>(granule.get()))
             return mayBeTrueOnGranule(bf_granule);
 
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: require bloom filter index granule.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Requires bloom filter index granule.");
     }
 
 private:

From 4590d3f81a1f44f2a8de137a60ea496230cd5d86 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 05:10:39 +0100
Subject: [PATCH 801/884] Miscellaneous

---
 src/Interpreters/getHeaderForProcessingStage.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index d16e01ef2d2..67a909ba6b4 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -167,8 +167,7 @@ Block getHeaderForProcessingStage(
             return result;
         }
     }
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical Error: unknown processed stage.");
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown processed stage.");
 }
 
 }
-

From e80bc24163efa5d838c3c9398644c19bba868e42 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 05:18:17 +0100
Subject: [PATCH 802/884] Writing object name is useless for our stack traces

---
 src/Common/StackTrace.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 7e683ae91de..436b85ff30b 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -448,9 +448,6 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
             DB::writePointerHex(frame.physical_addr, out);
         }
 
-        if (frame.object.has_value())
-            out << " in " << *frame.object;
-
         callback(out.str());
     };
 #else

From e2de74cf66d218032615d68b2b073529ec359c60 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 05:34:40 +0100
Subject: [PATCH 803/884] Fix debug

---
 src/Parsers/IParser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index e8f95954e70..198ec0346ff 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -82,7 +82,7 @@ public:
               * The frequency is arbitrary, but not too large, not too small,
               * and a power of two to simplify the division.
               */
-#if defined(USE_MUSL) || defined(SANITIZER)
+#if defined(USE_MUSL) || defined(SANITIZER) || !defined(NDEBUG)
             static constexpr uint32_t check_frequency = 128;
 #else
             static constexpr uint32_t check_frequency = 8192;

From ad26b6fc8d3e773594ced086d3915422071f597c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 07:08:24 +0100
Subject: [PATCH 804/884] Remove useless headers

---
 src/Client/ConnectionEstablisher.h                 | 1 -
 src/Client/PacketReceiver.h                        | 1 -
 src/Common/checkStackSize.cpp                      | 6 ------
 src/QueryPipeline/RemoteQueryExecutorReadContext.h | 1 -
 4 files changed, 9 deletions(-)

diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index 1fa08d435e9..a3a01e63246 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -3,7 +3,6 @@
 #include <Common/AsyncTaskExecutor.h>
 #include <Common/Epoll.h>
 #include <Common/Fiber.h>
-#include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
 #include <Common/PoolWithFailoverBase.h>
 #include <Client/ConnectionPool.h>
diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h
index deedf5cccdc..6b3da659290 100644
--- a/src/Client/PacketReceiver.h
+++ b/src/Client/PacketReceiver.h
@@ -5,7 +5,6 @@
 #include <variant>
 
 #include <Client/IConnections.h>
-#include <Common/FiberStack.h>
 #include <Common/Fiber.h>
 #include <Common/Epoll.h>
 #include <Common/TimerDescriptor.h>
diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index 1f4faf58fa5..954def83e94 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -1,12 +1,6 @@
 #include <Common/checkStackSize.h>
 #include <Common/Exception.h>
-#include <base/getThreadId.h>
-#include <base/scope_guard.h>
-#include <base/defines.h> /// THREAD_SANITIZER
-#include <sys/resource.h>
 #include <pthread.h>
-#include <unistd.h>
-#include <cstdint>
 
 #if defined(OS_FREEBSD)
 #   include <pthread_np.h>
diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h
index adfc0c5eacf..50df7e2db35 100644
--- a/src/QueryPipeline/RemoteQueryExecutorReadContext.h
+++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.h
@@ -5,7 +5,6 @@
 #include <mutex>
 #include <atomic>
 #include <Common/Fiber.h>
-#include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
 #include <Common/Epoll.h>
 #include <Common/AsyncTaskExecutor.h>

From 7fa9c733bd290ab081c151bf64c37f964cb92e8a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 07:27:02 +0100
Subject: [PATCH 805/884] Fix Fibers

---
 src/Common/Fiber.h            | 15 +++++++--------
 src/Common/checkStackSize.cpp |  5 +++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/Common/Fiber.h b/src/Common/Fiber.h
index f48ace149f4..8b88bd323ef 100644
--- a/src/Common/Fiber.h
+++ b/src/Common/Fiber.h
@@ -17,7 +17,7 @@ private:
     template <typename T> friend class FiberLocal;
 
 public:
-    template< typename StackAlloc, typename Fn>
+    template <typename StackAlloc, typename Fn>
     Fiber(StackAlloc && salloc, Fn && fn) : impl(std::allocator_arg_t(), std::forward<StackAlloc>(salloc), RoutineImpl(std::forward<Fn>(fn)))
     {
     }
@@ -46,6 +46,12 @@ public:
         current_fiber = parent_fiber;
     }
 
+    static FiberPtr & getCurrentFiber()
+    {
+        thread_local static FiberPtr current_fiber;
+        return current_fiber;
+    }
+
 private:
     template <typename Fn>
     struct RoutineImpl
@@ -74,12 +80,6 @@ private:
         Fn fn;
     };
 
-    static FiberPtr & getCurrentFiber()
-    {
-        thread_local static FiberPtr current_fiber;
-        return current_fiber;
-    }
-
     /// Special wrapper to store data in uniquer_ptr.
     struct DataWrapper
     {
@@ -146,4 +146,3 @@ private:
 
     T main_instance;
 };
-
diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index 954def83e94..fd0e8d89ca6 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -1,5 +1,6 @@
 #include <Common/checkStackSize.h>
 #include <Common/Exception.h>
+#include <Common/Fiber.h>
 #include <pthread.h>
 
 #if defined(OS_FREEBSD)
@@ -108,6 +109,10 @@ __attribute__((__weak__)) void checkStackSize()
 {
     using namespace DB;
 
+    /// Not implemented for coroutines.
+    if (Fiber::getCurrentFiber())
+        return;
+
     if (!stack_address)
         max_stack_size = getStackSize(&stack_address);
 

From 6329b6ae459519bc0471d3ce0179ed2bd09dacca Mon Sep 17 00:00:00 2001
From: Hongbin Ma <mahongbin@apache.org>
Date: Mon, 19 Feb 2024 14:49:02 +0800
Subject: [PATCH 806/884] fix flaky test case

---
 tests/queries/0_stateless/00873_t64_codec_date.reference | 2 +-
 tests/queries/0_stateless/00873_t64_codec_date.sql       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00873_t64_codec_date.reference b/tests/queries/0_stateless/00873_t64_codec_date.reference
index 1568c3122e6..9353696610c 100644
--- a/tests/queries/0_stateless/00873_t64_codec_date.reference
+++ b/tests/queries/0_stateless/00873_t64_codec_date.reference
@@ -1,4 +1,4 @@
 1970-01-01	1970-01-01	1950-01-01	1950-01-01
 1970-01-01	1970-01-01	1970-01-01	1970-01-01
-2149-06-06	2149-06-06	2149-06-08	2149-06-08
 2149-06-06	2149-06-06	2149-06-06	2149-06-06
+2149-06-06	2149-06-06	2149-06-08	2149-06-08
diff --git a/tests/queries/0_stateless/00873_t64_codec_date.sql b/tests/queries/0_stateless/00873_t64_codec_date.sql
index e9230c75665..c6e21baba12 100644
--- a/tests/queries/0_stateless/00873_t64_codec_date.sql
+++ b/tests/queries/0_stateless/00873_t64_codec_date.sql
@@ -13,7 +13,7 @@ INSERT INTO t64 values ('2149-06-06', '2149-06-06', '2149-06-06', '2149-06-06');
 INSERT INTO t64 values ('2149-06-08', '2149-06-08', '2149-06-08', '2149-06-08');
 INSERT INTO t64 values ('1950-01-01', '1950-01-01', '1950-01-01', '1950-01-01');
 
-SELECT * FROM t64 ORDER BY date16;
+SELECT * FROM t64 ORDER BY date_32;
 
 SELECT * FROM t64 WHERE date16 != t_date16;
 SELECT * FROM t64 WHERE date_32 != t_date32;

From 1f882063e759ec43d42fb47c83fc02b7510bc455 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 08:44:53 +0100
Subject: [PATCH 807/884] Fix build

---
 src/Common/checkStackSize.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index fd0e8d89ca6..c88554ca8fe 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -1,7 +1,12 @@
+#include <base/getThreadId.h>
+#include <base/defines.h> /// THREAD_SANITIZER
 #include <Common/checkStackSize.h>
 #include <Common/Exception.h>
 #include <Common/Fiber.h>
+#include <sys/resource.h>
 #include <pthread.h>
+#include <unistd.h>
+#include <cstdint>
 
 #if defined(OS_FREEBSD)
 #   include <pthread_np.h>

From b38ad8297c8c62871e75cc82afdd6ed666cff44d Mon Sep 17 00:00:00 2001
From: conicliu <conicliu@tencent.com>
Date: Mon, 19 Feb 2024 16:11:42 +0800
Subject: [PATCH 808/884] skip log empty message

---
 src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index 000d36752cb..e31d991ef09 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -701,7 +701,9 @@ void DataPartStorageOnDiskBase::remove(
             if (file_name.starts_with(proj_dir_name))
                 files_not_to_remove_for_projection.emplace(fs::path(file_name).filename());
 
-        LOG_DEBUG(log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name);
+        if (!files_not_to_remove_for_projection.empty())
+            LOG_DEBUG(
+                log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name);
 
         CanRemoveDescription proj_description
         {

From c55204d8efdd8ab9fb19e35fb8d07d2c171e5870 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 09:29:19 +0100
Subject: [PATCH 809/884] Implement TODO

---
 src/Common/tests/gtest_async_loader.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp
index 950c7bbab76..fc2537abcfc 100644
--- a/src/Common/tests/gtest_async_loader.cpp
+++ b/src/Common/tests/gtest_async_loader.cpp
@@ -427,9 +427,7 @@ TEST(AsyncLoader, CancelExecutingTask)
     }
 }
 
-// This test is disabled due to `MemorySanitizer: use-of-uninitialized-value` issue in `collectSymbolsFromProgramHeaders` function
-// More details: https://github.com/ClickHouse/ClickHouse/pull/48923#issuecomment-1545415482
-TEST(AsyncLoader, DISABLED_JobFailure)
+TEST(AsyncLoader, JobFailure)
 {
     AsyncLoaderTest t;
     t.loader.start();

From 327ba7d174f287bb6a25fefd8372632b31c0f62e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Feb 2024 09:30:19 +0100
Subject: [PATCH 810/884] Fix bad log message

---
 src/Common/LoggingFormatStringHelpers.cpp     | 5 ++---
 src/Processors/Formats/Impl/Parquet/Write.cpp | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp
index b3353a59010..7cbef779f28 100644
--- a/src/Common/LoggingFormatStringHelpers.cpp
+++ b/src/Common/LoggingFormatStringHelpers.cpp
@@ -130,13 +130,12 @@ LogSeriesLimiter::LogSeriesLimiter(LoggerPtr logger_, size_t allowed_count_, tim
     if (last_time + interval_s_ <= now)
     {
         debug_message = fmt::format(
-            " (LogSeriesLimiter: on interval from {} to {} accepted series {} / {} for the logger {} : {})",
+            " (LogSeriesLimiter: on interval from {} to {} accepted series {} / {} for the logger {})",
             DateLUT::instance().timeToString(last_time),
             DateLUT::instance().timeToString(now),
             accepted_count,
             total_count,
-            logger->name(),
-            double(name_hash));
+            logger->name());
 
         register_as_first();
         return;
diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp
index 02ca2734ff8..4d71e0102d8 100644
--- a/src/Processors/Formats/Impl/Parquet/Write.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Write.cpp
@@ -409,7 +409,7 @@ PODArray<char> & compress(PODArray<char> & source, PODArray<char> & scratch, Com
             #pragma clang diagnostic pop
 
             if (max_dest_size > std::numeric_limits<int>::max())
-                throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size()));
+                throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", ReadableSize(source.size()));
 
             scratch.resize(max_dest_size);
 

From 6565423b1a3ca7a6127b848fc112e8c2eadb66ae Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 19 Feb 2024 10:32:36 +0100
Subject: [PATCH 811/884] Review fix

---
 src/Storages/StorageBuffer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index c2b63101d11..5d717f84a1d 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/addMissingDefaults.h>
 #include <Interpreters/getColumnFromBlock.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
 #include <Storages/StorageBuffer.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/AlterCommands.h>
@@ -814,10 +815,10 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds)
     {
         if (flush_pool)
         {
-            flush_pool->scheduleOrThrowOnError([&] ()
+            scheduleFromThreadPool<void>([&] ()
             {
                 flushBuffer(buf, check_thresholds, false);
-            });
+            }, *flush_pool, "BufferFlush");
         }
         else
         {

From 0496d0f45fa6d07038a32de93f6f65ed02f0c971 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 18 Feb 2024 10:55:22 +0100
Subject: [PATCH 812/884] Do not retry queries if container is down in
 integration tests

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/helpers/cluster.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 1d96563251b..95722dd0db9 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -3484,6 +3484,10 @@ class ClickHouseInstance:
                 if check_callback(result):
                     return result
                 time.sleep(sleep_time)
+            except QueryRuntimeException as ex:
+                # Container is down, this is likely due to server crash.
+                if "No route to host" in str(ex):
+                    raise
             except Exception as ex:
                 # logging.debug("Retry {} got exception {}".format(i + 1, ex))
                 time.sleep(sleep_time)

From cfa198c4773cf05619766020bbe6aee859bb2739 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 19 Feb 2024 13:34:00 +0100
Subject: [PATCH 813/884] remove comment

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Functions/FunctionBinaryArithmetic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 9b4249b0aef..d253095ca01 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -2101,7 +2101,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
                         type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
                     else
                         type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
-                    // Create result decimal type somehow, maybe similar to how we do it in getReturnTypeImplStatic
 
                     auto res = executeNumericWithDecimal<LeftDataType, RightDataType, DecimalResultType>(
                             left, right,

From a5b62df0d6a2202d4a8ccdd766470db1c77e3cc3 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Feb 2024 13:46:51 +0100
Subject: [PATCH 814/884] Update 02982_dont_infer_exponent_floats.sql

---
 tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql
index 17f62557fc2..2a281e898f1 100644
--- a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql
+++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql
@@ -1,3 +1,2 @@
 DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0;
 DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1;
-

From 7c2654f62e210d76bff31571bcf60cf28a7e5a14 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 19 Feb 2024 13:41:22 +0100
Subject: [PATCH 815/884] Fix data race

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++++--
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 2826c3e23f1..11ede661f78 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -347,7 +347,7 @@ const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
 {
     std::scoped_lock lock(index_mutex);
     if (!index_loaded)
-        loadIndex(lock);
+        loadIndex();
     index_loaded = true;
     return index;
 }
@@ -569,6 +569,7 @@ void IMergeTreeDataPart::removeIfNeeded()
 
 UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const
 {
+    std::scoped_lock lock(index_mutex);
     UInt64 res = 0;
     for (const ColumnPtr & column : index)
         res += column->byteSize();
@@ -577,6 +578,7 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const
 
 UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const
 {
+    std::scoped_lock lock(index_mutex);
     UInt64 res = 0;
     for (const ColumnPtr & column : index)
         res += column->allocatedBytes();
@@ -828,7 +830,7 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co
 {
 }
 
-void IMergeTreeDataPart::loadIndex(std::scoped_lock<std::mutex> &) const
+void IMergeTreeDataPart::loadIndex() const
 {
     /// Memory for index must not be accounted as memory usage for query, because it belongs to a table.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index c9dea1afcc5..0d7acfab891 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -3,6 +3,7 @@
 #include <IO/WriteSettings.h>
 #include <Core/Block.h>
 #include <base/types.h>
+#include <base/defines.h>
 #include <Core/NamesAndTypes.h>
 #include <Storages/IStorage.h>
 #include <Storages/LightweightDeleteDescription.h>
@@ -565,8 +566,8 @@ protected:
     /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple.
     /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
     mutable std::mutex index_mutex;
-    mutable Index index;
-    mutable bool index_loaded = false;
+    mutable Index index TSA_GUARDED_BY(index_mutex);
+    mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false;
 
     /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
     ColumnSize total_columns_size;
@@ -664,7 +665,7 @@ private:
     virtual void appendFilesOfIndexGranularity(Strings & files) const;
 
     /// Loads the index file.
-    void loadIndex(std::scoped_lock<std::mutex> &) const;
+    void loadIndex() const TSA_REQUIRES(index_mutex);
 
     void appendFilesOfIndex(Strings & files) const;
 

From 17b1760eca0c9f61276d6a7e5cdba497bfdc48b2 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 19 Feb 2024 14:29:00 +0100
Subject: [PATCH 816/884] Add new setting to changes history

---
 src/Core/SettingsChangesHistory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 64650bf0ef5..600fe150805 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -85,6 +85,7 @@ namespace SettingsChangesHistory
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
     {"24.2", {
+              {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
               {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
               {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
               {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},

From 0dc73bacc52ca64c7a3049dab68fa0bc7a9c9219 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 19 Feb 2024 11:18:04 +0100
Subject: [PATCH 817/884] Fix cleanup

---
 src/Coordination/Changelog.cpp        | 70 ++++++++++++++++++---------
 src/Coordination/Changelog.h          |  2 +
 tests/config/config.d/keeper_port.xml |  4 +-
 3 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index d743801870a..16f6c36b8a3 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1,8 +1,7 @@
-#include <algorithm>
 #include <exception>
 #include <filesystem>
-#include <iterator>
 #include <mutex>
+#include <ranges>
 #include <Coordination/Changelog.h>
 #include <Coordination/Keeper4LWInfo.h>
 #include <Coordination/KeeperContext.h>
@@ -1039,6 +1038,7 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l
 
     if (logs_location.size() == 1)
         min_index_with_location = index;
+
     max_index_with_location = index;
 
     if (log_entry->get_val_type() == nuraft::conf)
@@ -1073,22 +1073,37 @@ void LogEntryStorage::cleanUpTo(uint64_t index)
             }
 
             min_index_with_location = index;
+
         }
     }
 
+    {
+        std::lock_guard lock(logs_location_mutex);
+        if (!unapplied_indices_with_log_locations.empty())
+        {
+            auto last = std::ranges::lower_bound(
+                unapplied_indices_with_log_locations,
+                index,
+                std::ranges::less{},
+                [](const auto & index_with_location) { return index_with_location.first; });
+
+            unapplied_indices_with_log_locations.erase(unapplied_indices_with_log_locations.begin(), last);
+        }
+    }
 
     /// uncommitted logs should be compacted only if we received snapshot from leader
     if (current_prefetch_info && !current_prefetch_info->done)
     {
         auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range;
         /// if we will clean some logs that are currently prefetched, stop prefetching
-        /// and clean all logs that were being prefetched
+        /// and clean all logs from it
         if (index > prefetch_from)
         {
             current_prefetch_info->cancel = true;
             current_prefetch_info->done.wait(false);
-            commit_logs_cache.cleanUpTo(std::max(prefetch_to + 1, index));
+            commit_logs_cache.clear();
         }
+
         /// start prefetching logs for committing at the current index
         /// the last log index in the snapshot should be the
         /// last log we cleaned up
@@ -1148,6 +1163,20 @@ void LogEntryStorage::cleanAfter(uint64_t index)
         }
     }
 
+    {
+        std::lock_guard lock(logs_location_mutex);
+        if (!unapplied_indices_with_log_locations.empty())
+        {
+            auto first = std::ranges::upper_bound(
+                unapplied_indices_with_log_locations,
+                index,
+                std::ranges::less{},
+                [](const auto & index_with_location) { return index_with_location.first; });
+
+            unapplied_indices_with_log_locations.erase(first, unapplied_indices_with_log_locations.end());
+        }
+    }
+
     /// if we cleared all latest logs, there is a possibility we would need to clear commit logs
     if (latest_logs_cache.empty())
     {
@@ -1204,24 +1233,24 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
     LogEntryPtr entry = nullptr;
 
     if (latest_config != nullptr && index == latest_config_index)
+        return latest_config;
+
+    if (first_log_entry != nullptr && index == first_log_index)
+        return first_log_entry;
+
+    if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index))
     {
-        entry = latest_config;
-    }
-    else if (first_log_entry != nullptr && index == first_log_index)
-    {
-        entry = first_log_entry;
-    }
-    else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index))
-    {
-        entry = std::move(entry_from_latest_cache);
         ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromLatestCache);
+        return entry_from_latest_cache;
     }
-    else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index))
+
+    if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index))
     {
-        entry = std::move(entry_from_commit_cache);
         ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromCommitCache);
+        return entry_from_commit_cache;
     }
-    else if (auto it = logs_location.find(index); it != logs_location.end())
+
+    if (auto it = logs_location.find(index); it != logs_location.end())
     {
         it->second.file_description->withLock(
             [&]
@@ -1250,6 +1279,7 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const
 
         ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile);
     }
+
     return entry;
 }
 
@@ -2378,14 +2408,10 @@ void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const
     if (!entry_storage.empty())
     {
         log_info.first_log_idx = getStartIndex();
-        auto first_entry = entryAt(log_info.first_log_idx);
-        chassert(first_entry != nullptr);
-        log_info.first_log_term = first_entry->get_term();
+        log_info.first_log_term = termAt(log_info.first_log_idx);
 
         log_info.last_log_idx = max_log_id;
-        auto last_entry = entryAt(log_info.last_log_idx);
-        chassert(last_entry != nullptr);
-        log_info.last_log_term = last_entry->get_term();
+        log_info.last_log_term = termAt(log_info.last_log_idx);
     }
 
     entry_storage.getKeeperLogInfo(log_info);
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index f8f05afa24f..2e8dbe75e90 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -169,7 +169,9 @@ struct LogEntryStorage
 
     void addEntry(uint64_t index, const LogEntryPtr & log_entry);
     void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location);
+    /// clean all logs up to (but not including) index
     void cleanUpTo(uint64_t index);
+    /// clean all logs after (but not including) index
     void cleanAfter(uint64_t index);
     bool contains(uint64_t index) const;
     LogEntryPtr getEntry(uint64_t index) const;
diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index 0487ceed989..2b04d843a3b 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -25,8 +25,8 @@
 
             <async_replication>1</async_replication>
 
-            <latest_logs_cache_size_threshold>31557632</latest_logs_cache_size_threshold>
-            <commit_logs_cache_size_threshold>20623360</commit_logs_cache_size_threshold>
+            <latest_logs_cache_size_threshold>1073741824</latest_logs_cache_size_threshold>
+            <commit_logs_cache_size_threshold>524288000</commit_logs_cache_size_threshold>
         </coordination_settings>
 
         <raft_configuration>

From 9e4e44fc711bcc2bbbef42153f562ca5e4e1def9 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 19 Feb 2024 14:18:52 +0000
Subject: [PATCH 818/884] Tests: query log records for insert over http

---
 .../test_insert_exception_over_http/test.py   | 79 +++++++++++++++++--
 1 file changed, 73 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py
index a03d68e0b03..e63f79928bf 100644
--- a/tests/integration/test_insert_exception_over_http/test.py
+++ b/tests/integration/test_insert_exception_over_http/test.py
@@ -15,17 +15,21 @@ def start_cluster():
         cluster.shutdown()
 
 
-def test_insert_exception_over_http(start_cluster):
+@pytest.mark.parametrize("inject_failpoint", [1, 0])
+def test_insert_over_http_exception(start_cluster, inject_failpoint):
+
     instance.query("DROP TABLE IF EXISTS tt SYNC")
     instance.query(
         "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)"
     )
-    instance.query(
-        "SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception"
-    )
+    if inject_failpoint > 0:
+        instance.query(
+            "SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception"
+        )
 
+    log_comment = f"{inject_failpoint}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87"
     assert True == instance.http_query_and_get_error(
-        "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)",
+        f"insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='{log_comment}' values (1), (2), (3), (4), (5)",
         method="POST",
     ).startswith("500 Internal Server Error")
 
@@ -33,12 +37,75 @@ def test_insert_exception_over_http(start_cluster):
 
     instance.query("SYSTEM FLUSH LOGS")
 
+    assert "1\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'"
+    )
+    assert "1\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'"
+    )
+    assert "0\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type != 'QueryStart' and type != 'ExceptionWhileProcessing'"
+    )
+
+    instance.query("DROP TABLE tt SYNC")
+
+
+def test_insert_over_http_invalid_statement(start_cluster):
+
+    http_status = 400
+    log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87"
+    assert True == instance.http_query_and_get_error(
+        f"insert into settings log_comment='{log_comment}' values (1), (2), (3), (4), (5)",
+        method="POST",
+    ).startswith(f"{http_status}")
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    assert f"0\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday()"
+    )
+
+def test_insert_over_http_unknown_table(start_cluster):
+
+    http_status = 404
+    log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87"
+    assert True == instance.http_query_and_get_error(
+        f"insert into unknown_table settings log_comment='{log_comment}' values (1), (2), (3), (4), (5)",
+        method="POST",
+    ).startswith(f"{http_status}")
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    assert f"1\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionBeforeStart'"
+    )
+    assert f"0\n" == instance.query(
+        f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type != 'ExceptionBeforeStart'"
+    )
+
+
+def test_insert_over_http_ok(start_cluster):
+    instance.query("DROP TABLE IF EXISTS tt SYNC")
+    instance.query(
+        "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)"
+    )
+
+    _, error = instance.http_query_and_get_answer_with_error(
+        "insert into tt settings log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)",
+        method="POST",
+    )
+    assert(error == None)
+
+    assert "5\n" == instance.query("select count() from tt")
+
+    instance.query("SYSTEM FLUSH LOGS")
+
     assert "1\n" == instance.query(
         "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'"
     )
 
     assert "1\n" == instance.query(
-        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'"
+        "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryFinish'"
     )
 
     instance.query("DROP TABLE tt SYNC")

From e77afa9dc521a367130156908b2ed5e1686c3709 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 19 Feb 2024 14:20:39 +0000
Subject: [PATCH 819/884] Rename test

---
 .../__init__.py                                                   | 0
 .../test.py                                                       | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/integration/{test_insert_exception_over_http => test_insert_over_http_query_log}/__init__.py (100%)
 rename tests/integration/{test_insert_exception_over_http => test_insert_over_http_query_log}/test.py (100%)

diff --git a/tests/integration/test_insert_exception_over_http/__init__.py b/tests/integration/test_insert_over_http_query_log/__init__.py
similarity index 100%
rename from tests/integration/test_insert_exception_over_http/__init__.py
rename to tests/integration/test_insert_over_http_query_log/__init__.py
diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_over_http_query_log/test.py
similarity index 100%
rename from tests/integration/test_insert_exception_over_http/test.py
rename to tests/integration/test_insert_over_http_query_log/test.py

From cc223a1c3e8579a3192da8881a501d6531ccf70e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 19 Feb 2024 14:33:53 +0000
Subject: [PATCH 820/884] Automatic style fix

---
 tests/integration/test_insert_over_http_query_log/test.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_insert_over_http_query_log/test.py b/tests/integration/test_insert_over_http_query_log/test.py
index e63f79928bf..6e862e5ddde 100644
--- a/tests/integration/test_insert_over_http_query_log/test.py
+++ b/tests/integration/test_insert_over_http_query_log/test.py
@@ -17,7 +17,6 @@ def start_cluster():
 
 @pytest.mark.parametrize("inject_failpoint", [1, 0])
 def test_insert_over_http_exception(start_cluster, inject_failpoint):
-
     instance.query("DROP TABLE IF EXISTS tt SYNC")
     instance.query(
         "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)"
@@ -51,7 +50,6 @@ def test_insert_over_http_exception(start_cluster, inject_failpoint):
 
 
 def test_insert_over_http_invalid_statement(start_cluster):
-
     http_status = 400
     log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87"
     assert True == instance.http_query_and_get_error(
@@ -65,8 +63,8 @@ def test_insert_over_http_invalid_statement(start_cluster):
         f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday()"
     )
 
-def test_insert_over_http_unknown_table(start_cluster):
 
+def test_insert_over_http_unknown_table(start_cluster):
     http_status = 404
     log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87"
     assert True == instance.http_query_and_get_error(
@@ -94,7 +92,7 @@ def test_insert_over_http_ok(start_cluster):
         "insert into tt settings log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)",
         method="POST",
     )
-    assert(error == None)
+    assert error == None
 
     assert "5\n" == instance.query("select count() from tt")
 

From 8f5a4d92a172d704f02ff01d11c7e4142f41de3f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Feb 2024 15:19:04 +0000
Subject: [PATCH 821/884] Fix style

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 -
 src/Interpreters/TreeRewriter.cpp         | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 8b42135ecec..852c1d4d8c7 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -122,7 +122,6 @@ namespace ErrorCodes
     extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
     extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
     extern const int SYNTAX_ERROR;
-    extern const int UNEXPECTED_EXPRESSION;
     extern const int INVALID_IDENTIFIER;
 }
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 14fbc9ebebb..e442e47faf4 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -73,7 +73,6 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNKNOWN_IDENTIFIER;
-    extern const int UNEXPECTED_EXPRESSION;
 }
 
 namespace

From 175a1db7876d4a7ff115a56c7018a386812b5a87 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 19 Feb 2024 16:11:29 +0100
Subject: [PATCH 822/884] Support specifying users for s3 settings

---
 src/Backups/BackupIO_S3.cpp                   |  4 +-
 src/IO/S3Common.cpp                           | 21 +++++++-
 src/IO/S3Common.h                             |  5 ++
 src/Storages/StorageS3.cpp                    |  2 +-
 src/Storages/StorageS3Settings.cpp            |  5 +-
 src/Storages/StorageS3Settings.h              |  2 +-
 .../configs/s3_settings.xml                   |  7 +++
 .../test_backup_restore_s3/test.py            | 54 +++++++++++++++++++
 8 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index fa4c1af3698..174b5cfc27c 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -127,7 +127,7 @@ BackupReaderS3::BackupReaderS3(
     : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
     , s3_uri(s3_uri_)
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
-    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()))
+    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName()))
 {
     auto & request_settings = s3_settings.request_settings;
     request_settings.updateFromSettings(context_->getSettingsRef());
@@ -217,7 +217,7 @@ BackupWriterS3::BackupWriterS3(
     : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
     , s3_uri(s3_uri_)
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
-    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()))
+    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName()))
 {
     auto & request_settings = s3_settings.request_settings;
     request_settings.updateFromSettings(context_->getSettingsRef());
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 5039059f522..56e3e0df21b 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -1,7 +1,9 @@
 #include <IO/S3Common.h>
 
 #include <Common/Exception.h>
+#include <Common/StringUtils/StringUtils.h>
 #include <Poco/Util/AbstractConfiguration.h>
+
 #include "config.h"
 
 #if USE_AWS_S3
@@ -124,6 +126,15 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
     HTTPHeaderEntries headers = getHTTPHeaders(config_elem, config);
     ServerSideEncryptionKMSConfig sse_kms_config = getSSEKMSConfig(config_elem, config);
 
+    std::unordered_set<std::string> users;
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(config_elem, keys);
+    for (const auto & key : keys)
+    {
+        if (startsWith(key, "user"))
+            users.insert(config.getString(config_elem + "." + key));
+    }
+
     return AuthSettings
     {
         std::move(access_key_id), std::move(secret_access_key), std::move(session_token),
@@ -134,10 +145,16 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
         use_environment_credentials,
         use_insecure_imds_request,
         expiration_window_seconds,
-        no_sign_request
+        no_sign_request,
+        std::move(users)
     };
 }
 
+bool AuthSettings::canBeUsedByUser(const String & user) const
+{
+    return users.empty() || users.contains(user);
+}
+
 bool AuthSettings::hasUpdates(const AuthSettings & other) const
 {
     AuthSettings copy = *this;
@@ -173,6 +190,8 @@ void AuthSettings::updateFrom(const AuthSettings & from)
 
     if (from.no_sign_request.has_value())
         no_sign_request = from.no_sign_request;
+
+    users.insert(from.users.begin(), from.users.end());
 }
 
 }
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 6ee8d96ed09..b3e01bd6132 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -6,6 +6,7 @@
 
 #include <string>
 #include <optional>
+#include <unordered_set>
 
 #include "config.h"
 
@@ -92,9 +93,13 @@ struct AuthSettings
     std::optional<uint64_t> expiration_window_seconds;
     std::optional<bool> no_sign_request;
 
+    std::unordered_set<std::string> users;
+
     bool hasUpdates(const AuthSettings & other) const;
     void updateFrom(const AuthSettings & from);
 
+    bool canBeUsedByUser(const String & user) const;
+
 private:
     bool operator==(const AuthSettings & other) const = default;
 };
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index e29fdd0d4a0..2d8ef3df1c8 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1385,7 +1385,7 @@ const StorageS3::Configuration & StorageS3::getConfiguration()
 
 bool StorageS3::Configuration::update(const ContextPtr & context)
 {
-    auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString());
+    auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName());
     request_settings = s3_settings.request_settings;
     request_settings.updateFromSettings(context->getSettings());
 
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index b0c1160429a..2a0d15a2bab 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -293,7 +293,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
     }
 }
 
-S3Settings StorageS3Settings::getSettings(const String & endpoint) const
+S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const
 {
     std::lock_guard lock(mutex);
     auto next_prefix_setting = s3_settings.upper_bound(endpoint);
@@ -302,7 +302,8 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const
     for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();)
     {
         std::advance(possible_prefix_setting, -1);
-        if (boost::algorithm::starts_with(endpoint, possible_prefix_setting->first))
+        const auto & [endpoint_prefix, settings] = *possible_prefix_setting;
+        if (boost::algorithm::starts_with(endpoint, endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user))
             return possible_prefix_setting->second;
     }
 
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 0e152bb2d31..21b6264717e 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -112,7 +112,7 @@ class StorageS3Settings
 public:
     void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings);
 
-    S3Settings getSettings(const String & endpoint) const;
+    S3Settings getSettings(const String & endpoint, const String & user) const;
 
 private:
     mutable std::mutex mutex;
diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
index 981cf67bbe9..adeb61cbe07 100644
--- a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
+++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
@@ -10,6 +10,13 @@
             <upload_part_size_multiply_parts_count_threshold>3</upload_part_size_multiply_parts_count_threshold>
             <upload_part_size_multiply_factor>2</upload_part_size_multiply_factor>
         </multipart>
+        <limited>
+            <endpoint>http://minio1:9001/root/data/backups/limited/</endpoint>
+            <access_key_id>minio</access_key_id>
+            <secret_access_key>minio123</secret_access_key>
+            <user>superuser1</user>
+            <user>superuser2</user>
+        </limited>
     </s3>
     <backup_threads>1</backup_threads>
     <restore_threads>1</restore_threads>
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index cd8f70b3239..783cf1feade 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -452,3 +452,57 @@ def test_backup_to_zip():
     backup_name = new_backup_name()
     backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.zip', 'minio', 'minio123')"
     check_backup_and_restore(storage_policy, backup_destination)
+
+
+def test_user_specific_auth(start_cluster):
+    def create_user(user):
+        node.query(f"CREATE USER {user}")
+        node.query(f"GRANT CURRENT GRANTS ON *.* TO {user}")
+
+    create_user("superuser1")
+    create_user("superuser2")
+    create_user("regularuser")
+
+    node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory")
+
+    assert "Access Denied" in node.query_and_get_error(
+        "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')"
+    )
+    assert "Access Denied" in node.query_and_get_error(
+        "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
+        user="regularuser",
+    )
+
+    node.query(
+        "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
+        user="superuser1",
+    )
+    node.query(
+        "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
+        user="superuser1",
+    )
+
+    node.query(
+        "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')",
+        user="superuser2",
+    )
+    node.query(
+        "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')",
+        user="superuser2",
+    )
+
+    assert "Access Denied" in node.query_and_get_error(
+        "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
+        user="regularuser",
+    )
+
+    assert "HTTP response code: 403" in node.query_and_get_error(
+        "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')",
+        user="regularuser",
+    )
+    node.query(
+        "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')",
+        user="superuser1",
+    )
+
+    node.query("DROP TABLE IF EXISTS test.specific_auth")

From 793ae52bf886372579e2ae6af9a73eb0bbe25f99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 19 Feb 2024 17:30:00 +0100
Subject: [PATCH 823/884] Fix cosineDistance crash with Nullable

---
 .../en/sql-reference/functions/distance-functions.md |  2 +-
 src/Functions/vectorFunctions.cpp                    | 12 ++++++------
 .../02994_cosineDistanceNullable.reference           | 11 +++++++++++
 .../0_stateless/02994_cosineDistanceNullable.sql     |  3 +++
 4 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/02994_cosineDistanceNullable.reference
 create mode 100644 tests/queries/0_stateless/02994_cosineDistanceNullable.sql

diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md
index 1774c22014d..e20c35c6b6f 100644
--- a/docs/en/sql-reference/functions/distance-functions.md
+++ b/docs/en/sql-reference/functions/distance-functions.md
@@ -509,7 +509,7 @@ Result:
 
 ## cosineDistance
 
-Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors.
+Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The smaller the returned value is, the more similar are the vectors.
 
 **Syntax**
 
diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp
index 33b0e9f6039..de4a6fb0a5c 100644
--- a/src/Functions/vectorFunctions.cpp
+++ b/src/Functions/vectorFunctions.cpp
@@ -1,9 +1,9 @@
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeNothing.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/ITupleFunction.h>
@@ -1364,11 +1364,11 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        if (getReturnTypeImpl(arguments)->isNullable())
-        {
-            return DataTypeNullable(std::make_shared<DataTypeNothing>())
-                   .createColumnConstWithDefaultValue(input_rows_count);
-        }
+        /// TODO: cosineDistance does not support nullable arguments
+        /// https://github.com/ClickHouse/ClickHouse/pull/27933#issuecomment-916670286
+        auto return_type = getReturnTypeImpl(arguments);
+        if (return_type->isNullable())
+            return return_type->createColumnConstWithDefaultValue(input_rows_count);
 
         FunctionDotProduct dot(context);
         ColumnWithTypeAndName dot_result{dot.executeImpl(arguments, DataTypePtr(), input_rows_count),
diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.reference b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference
new file mode 100644
index 00000000000..e4fe1f97e7e
--- /dev/null
+++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference
@@ -0,0 +1,11 @@
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.sql b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql
new file mode 100644
index 00000000000..a62216982f3
--- /dev/null
+++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql
@@ -0,0 +1,3 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/59596
+SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1));
+SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)) from numbers(10);

From 9361946d151a082ca190b9d7489804b9c30ef3b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 19 Feb 2024 17:48:53 +0100
Subject: [PATCH 824/884] Fix build in master

---
 src/Storages/StorageBuffer.cpp | 58 +++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 5d717f84a1d..2925038ec8e 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -1,41 +1,41 @@
-#include <boost/range/algorithm_ext/erase.hpp>
-#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterInsertQuery.h>
+#include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/addMissingDefaults.h>
 #include <Interpreters/castColumn.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/addMissingDefaults.h>
 #include <Interpreters/getColumnFromBlock.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
-#include <Storages/StorageBuffer.h>
-#include <Storages/StorageFactory.h>
-#include <Storages/AlterCommands.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTExpressionList.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/MemoryTrackerBlockerInThread.h>
-#include <Common/FieldVisitorConvertToNumber.h>
-#include <Common/quoteString.h>
-#include <Common/typeid_cast.h>
-#include <Common/ProfileEvents.h>
-#include <Common/logger_useful.h>
-#include <base/getThreadId.h>
-#include <base/range.h>
-#include <Processors/QueryPlan/ExpressionStep.h>
-#include <Processors/Transforms/FilterTransform.h>
-#include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/ReverseTransform.h>
-#include <Processors/Transforms/PartialSortingTransform.h>
-#include <Processors/Sinks/SinkToStorage.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTInsertQuery.h>
+#include <Parsers/ASTLiteral.h>
+#include <Processors/Executors/PushingPipelineExecutor.h>
 #include <Processors/ISource.h>
+#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/UnionStep.h>
-#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
-#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
-#include <Processors/Executors/PushingPipelineExecutor.h>
+#include <Processors/Sinks/SinkToStorage.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/FilterTransform.h>
+#include <Processors/Transforms/PartialSortingTransform.h>
+#include <Processors/Transforms/ReverseTransform.h>
+#include <Storages/AlterCommands.h>
+#include <Storages/StorageBuffer.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <base/getThreadId.h>
+#include <base/range.h>
+#include <boost/range/algorithm_ext/erase.hpp>
+#include <Common/CurrentMetrics.h>
+#include <Common/FieldVisitorConvertToNumber.h>
+#include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/ProfileEvents.h>
+#include <Common/logger_useful.h>
+#include <Common/quoteString.h>
+#include <Common/threadPoolCallbackRunner.h>
+#include <Common/typeid_cast.h>
 
 
 namespace ProfileEvents

From f2091ac6cf90bb87b1d6370bc6cd0b4d4c0daa29 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Feb 2024 19:33:14 +0000
Subject: [PATCH 825/884] Mini cleanup of CPUID.h

---
 src/Common/CPUID.h | 490 ++++++++++++++++++++++-----------------------
 1 file changed, 243 insertions(+), 247 deletions(-)

diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h
index b47e7e808d7..d7a714ec5af 100644
--- a/src/Common/CPUID.h
+++ b/src/Common/CPUID.h
@@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
 #endif
 }
 
+union CPUInfo
+{
+    UInt32 info[4];
+
+    struct Registers
+    {
+        UInt32 eax;
+        UInt32 ebx;
+        UInt32 ecx;
+        UInt32 edx;
+    } registers;
+
+    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
+
+    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
+};
+
+inline bool haveRDTSCP() noexcept
+{
+    return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
+}
+
+inline bool haveSSE() noexcept
+{
+    return (CPUInfo(0x1).registers.edx >> 25) & 1u;
+}
+
+inline bool haveSSE2() noexcept
+{
+    return (CPUInfo(0x1).registers.edx >> 26) & 1u;
+}
+
+inline bool haveSSE3() noexcept
+{
+    return CPUInfo(0x1).registers.ecx & 1u;
+}
+
+inline bool havePCLMUL() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
+}
+
+inline bool haveSSSE3() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
+}
+
+inline bool haveSSE41() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
+}
+
+inline bool haveSSE42() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
+}
+
+inline bool haveF16C() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
+}
+
+inline bool havePOPCNT() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
+}
+
+inline bool haveAES() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
+}
+
+inline bool haveXSAVE() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
+}
+
+inline bool haveOSXSAVE() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
+}
+
+inline bool haveAVX() noexcept
+{
+#if defined(__x86_64__)
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+    // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
+           && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
+#else
+    return false;
+#endif
+}
+
+inline bool haveFMA() noexcept
+{
+    return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
+}
+
+inline bool haveAVX2() noexcept
+{
+    return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
+}
+
+inline bool haveBMI1() noexcept
+{
+    return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
+}
+
+inline bool haveBMI2() noexcept
+{
+    return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
+}
+
+inline bool haveAVX512F() noexcept
+{
+#if defined(__x86_64__)
+    // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
+           && ((our_xgetbv(0) >> 5) & 7u) == 7u       // ZMM state is enabled by OS
+           && CPUInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
+           && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
+#else
+    return false;
+#endif
+}
+
+inline bool haveAVX512DQ() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
+}
+
+inline bool haveRDSEED() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
+}
+
+inline bool haveADX() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
+}
+
+inline bool haveAVX512IFMA() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
+}
+
+inline bool havePCOMMIT() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
+}
+
+inline bool haveCLFLUSHOPT() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
+}
+
+inline bool haveCLWB() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
+}
+
+inline bool haveAVX512PF() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
+}
+
+inline bool haveAVX512ER() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
+}
+
+inline bool haveAVX512CD() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
+}
+
+inline bool haveSHA() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
+}
+
+inline bool haveAVX512BW() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
+}
+
+inline bool haveAVX512VL() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
+}
+
+inline bool havePREFETCHWT1() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
+}
+
+inline bool haveAVX512VBMI() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
+}
+
+inline bool haveAVX512VBMI2() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
+}
+
+inline bool haveRDRAND() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
+}
+
+inline bool haveAMX() noexcept
+{
+#if defined(__x86_64__)
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && ((our_xgetbv(0) >> 17) & 0x3) == 0x3;        // AMX state are enabled by OS
+#else
+    return false;
+#endif
+}
+
+inline bool haveAMXBF16() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
+}
+
+inline bool haveAMXTILE() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
+}
+
+inline bool haveAMXINT8() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
+}
+
 #define CPU_ID_ENUMERATE(OP) \
     OP(SSE)                  \
     OP(SSE2)                 \
@@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
     OP(AMXTILE)              \
     OP(AMXINT8)
 
-union CPUInfo
-{
-    UInt32 info[4];
-
-    struct Registers
-    {
-        UInt32 eax;
-        UInt32 ebx;
-        UInt32 ecx;
-        UInt32 edx;
-    } registers;
-
-    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
-
-    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
-};
-
-#define DEF_NAME(X) inline bool have##X() noexcept;
-    CPU_ID_ENUMERATE(DEF_NAME)
-#undef DEF_NAME
-
-bool haveRDTSCP() noexcept
-{
-    return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
-}
-
-bool haveSSE() noexcept
-{
-    return (CPUInfo(0x1).registers.edx >> 25) & 1u;
-}
-
-bool haveSSE2() noexcept
-{
-    return (CPUInfo(0x1).registers.edx >> 26) & 1u;
-}
-
-bool haveSSE3() noexcept
-{
-    return CPUInfo(0x1).registers.ecx & 1u;
-}
-
-bool havePCLMUL() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
-}
-
-bool haveSSSE3() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
-}
-
-bool haveSSE41() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
-}
-
-bool haveSSE42() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
-}
-
-bool haveF16C() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
-}
-
-bool havePOPCNT() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
-}
-
-bool haveAES() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
-}
-
-bool haveXSAVE() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
-}
-
-bool haveOSXSAVE() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
-}
-
-bool haveAVX() noexcept
-{
-#if defined(__x86_64__)
-    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-    // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
-           && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
-#else
-    return false;
-#endif
-}
-
-bool haveFMA() noexcept
-{
-    return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
-}
-
-bool haveAVX2() noexcept
-{
-    return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
-}
-
-bool haveBMI1() noexcept
-{
-    return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
-}
-
-bool haveBMI2() noexcept
-{
-    return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
-}
-
-bool haveAVX512F() noexcept
-{
-#if defined(__x86_64__)
-    // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
-           && ((our_xgetbv(0) >> 5) & 7u) == 7u       // ZMM state is enabled by OS
-           && CPUInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
-           && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
-#else
-    return false;
-#endif
-}
-
-bool haveAVX512DQ() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
-}
-
-bool haveRDSEED() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
-}
-
-bool haveADX() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
-}
-
-bool haveAVX512IFMA() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
-}
-
-bool havePCOMMIT() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
-}
-
-bool haveCLFLUSHOPT() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
-}
-
-bool haveCLWB() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
-}
-
-bool haveAVX512PF() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
-}
-
-bool haveAVX512ER() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
-}
-
-bool haveAVX512CD() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
-}
-
-bool haveSHA() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
-}
-
-bool haveAVX512BW() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
-}
-
-bool haveAVX512VL() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
-}
-
-bool havePREFETCHWT1() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
-}
-
-bool haveAVX512VBMI() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
-}
-
-bool haveAVX512VBMI2() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
-}
-
-bool haveRDRAND() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
-}
-
-inline bool haveAMX() noexcept
-{
-#if defined(__x86_64__)
-    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && ((our_xgetbv(0) >> 17) & 0x3) == 0x3;        // AMX state are enabled by OS
-#else
-    return false;
-#endif
-}
-
-bool haveAMXBF16() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
-}
-
-bool haveAMXTILE() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
-}
-
-bool haveAMXINT8() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
-}
-
 struct CPUFlagsCache
 {
 #define DEF_NAME(X) static inline bool have_##X = have##X();

From 4ad485a76934c0bd7167f0b9a2cb2dadd9f8056c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 19 Feb 2024 20:51:37 +0100
Subject: [PATCH 826/884] Add setting history check to stateless tests

---
 .../0_stateless/02995_baseline_23_12_1.tsv    | 940 ++++++++++++++++++
 .../02995_new_settings_history.reference      |   0
 .../0_stateless/02995_new_settings_history.sh |  46 +
 3 files changed, 986 insertions(+)
 create mode 100644 tests/queries/0_stateless/02995_baseline_23_12_1.tsv
 create mode 100644 tests/queries/0_stateless/02995_new_settings_history.reference
 create mode 100755 tests/queries/0_stateless/02995_new_settings_history.sh

diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv
new file mode 100644
index 00000000000..4c0c9125b46
--- /dev/null
+++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv
@@ -0,0 +1,940 @@
+add_http_cors_header	0
+additional_result_filter	
+additional_table_filters	{}
+aggregate_functions_null_for_empty	0
+aggregation_in_order_max_block_bytes	50000000
+aggregation_memory_efficient_merge_threads	0
+allow_aggregate_partitions_independently	0
+allow_asynchronous_read_from_io_pool_for_merge_tree	0
+allow_changing_replica_until_first_data_packet	0
+allow_create_index_without_type	0
+allow_custom_error_code_in_throwif	0
+allow_ddl	1
+allow_deprecated_database_ordinary	0
+allow_deprecated_syntax_for_merge_tree	0
+allow_distributed_ddl	1
+allow_drop_detached	0
+allow_execute_multiif_columnar	1
+allow_experimental_alter_materialized_view_structure	1
+allow_experimental_analyzer	0
+allow_experimental_annoy_index	0
+allow_experimental_bigint_types	1
+allow_experimental_codecs	0
+allow_experimental_database_atomic	1
+allow_experimental_database_materialized_mysql	0
+allow_experimental_database_materialized_postgresql	0
+allow_experimental_database_replicated	0
+allow_experimental_funnel_functions	0
+allow_experimental_geo_types	1
+allow_experimental_hash_functions	0
+allow_experimental_inverted_index	0
+allow_experimental_lightweight_delete	1
+allow_experimental_live_view	0
+allow_experimental_map_type	1
+allow_experimental_materialized_postgresql_table	0
+allow_experimental_nlp_functions	0
+allow_experimental_object_type	0
+allow_experimental_parallel_reading_from_replicas	0
+allow_experimental_projection_optimization	1
+allow_experimental_query_cache	1
+allow_experimental_query_deduplication	0
+allow_experimental_refreshable_materialized_view	0
+allow_experimental_s3queue	1
+allow_experimental_shared_merge_tree	0
+allow_experimental_statistic	0
+allow_experimental_undrop_table_query	1
+allow_experimental_usearch_index	0
+allow_experimental_window_functions	1
+allow_experimental_window_view	0
+allow_hyperscan	1
+allow_introspection_functions	0
+allow_named_collection_override_by_default	1
+allow_non_metadata_alters	1
+allow_nonconst_timezone_arguments	0
+allow_nondeterministic_mutations	0
+allow_nondeterministic_optimize_skip_unused_shards	0
+allow_prefetched_read_pool_for_local_filesystem	0
+allow_prefetched_read_pool_for_remote_filesystem	1
+allow_push_predicate_when_subquery_contains_with	1
+allow_settings_after_format_in_insert	0
+allow_simdjson	1
+allow_statistic_optimize	0
+allow_suspicious_codecs	0
+allow_suspicious_fixed_string_types	0
+allow_suspicious_indices	0
+allow_suspicious_low_cardinality_types	0
+allow_suspicious_ttl_expressions	0
+allow_unrestricted_reads_from_keeper	0
+alter_move_to_space_execute_async	0
+alter_partition_verbose_result	0
+alter_sync	1
+analyze_index_with_space_filling_curves	1
+annoy_index_search_k_nodes	-1
+any_join_distinct_right_table_keys	0
+apply_deleted_mask	1
+apply_mutations_on_fly	0
+asterisk_include_alias_columns	0
+asterisk_include_materialized_columns	0
+async_insert	0
+async_insert_busy_timeout_ms	200
+async_insert_cleanup_timeout_ms	1000
+async_insert_deduplicate	0
+async_insert_max_data_size	1000000
+async_insert_max_query_number	450
+async_insert_stale_timeout_ms	0
+async_insert_threads	16
+async_query_sending_for_remote	1
+async_socket_for_remote	1
+azure_create_new_file_on_insert	0
+azure_list_object_keys_size	1000
+azure_max_single_part_upload_size	104857600
+azure_max_single_read_retries	4
+azure_truncate_on_insert	0
+background_buffer_flush_schedule_pool_size	16
+background_common_pool_size	8
+background_distributed_schedule_pool_size	16
+background_fetches_pool_size	8
+background_merges_mutations_concurrency_ratio	2
+background_message_broker_schedule_pool_size	16
+background_move_pool_size	8
+background_pool_size	16
+background_schedule_pool_size	128
+backup_restore_batch_size_for_keeper_multi	1000
+backup_restore_batch_size_for_keeper_multiread	10000
+backup_restore_keeper_fault_injection_probability	0
+backup_restore_keeper_fault_injection_seed	0
+backup_restore_keeper_max_retries	20
+backup_restore_keeper_retry_initial_backoff_ms	100
+backup_restore_keeper_retry_max_backoff_ms	5000
+backup_restore_keeper_value_max_size	1048576
+backup_threads	16
+bool_false_representation	false
+bool_true_representation	true
+cache_warmer_threads	4
+calculate_text_stack_trace	1
+cancel_http_readonly_queries_on_client_close	0
+cast_ipv4_ipv6_default_on_conversion_error	0
+cast_keep_nullable	0
+check_query_single_value_result	1
+check_referential_table_dependencies	0
+check_table_dependencies	1
+checksum_on_read	1
+cloud_mode	0
+cloud_mode_engine	1
+cluster_for_parallel_replicas	
+collect_hash_table_stats_during_aggregation	1
+column_names_for_schema_inference	
+compatibility	
+compatibility_ignore_auto_increment_in_create_table	0
+compatibility_ignore_collation_in_create_table	1
+compile_aggregate_expressions	1
+compile_expressions	0
+compile_sort_description	1
+connect_timeout	10
+connect_timeout_with_failover_ms	1000
+connect_timeout_with_failover_secure_ms	1000
+connection_pool_max_wait_ms	0
+connections_with_failover_max_tries	3
+convert_query_to_cnf	0
+count_distinct_implementation	uniqExact
+count_distinct_optimization	0
+create_index_ignore_unique	0
+create_replicated_merge_tree_fault_injection_probability	0
+create_table_empty_primary_key_by_default	0
+cross_to_inner_join_rewrite	1
+data_type_default_nullable	0
+database_atomic_wait_for_drop_and_detach_synchronously	0
+database_replicated_allow_only_replicated_engine	0
+database_replicated_allow_replicated_engine_arguments	1
+database_replicated_always_detach_permanently	0
+database_replicated_ddl_output	1
+database_replicated_enforce_synchronous_settings	0
+database_replicated_initial_query_timeout_sec	300
+date_time_input_format	basic
+date_time_output_format	simple
+date_time_overflow_behavior	ignore
+decimal_check_overflow	1
+deduplicate_blocks_in_dependent_materialized_views	0
+default_database_engine	Atomic
+default_max_bytes_in_join	1000000000
+default_table_engine	None
+default_temporary_table_engine	Memory
+describe_compact_output	0
+describe_extend_object_types	0
+describe_include_subcolumns	0
+describe_include_virtual_columns	0
+dialect	clickhouse
+dictionary_use_async_executor	0
+distinct_overflow_mode	throw
+distributed_aggregation_memory_efficient	1
+distributed_background_insert_batch	0
+distributed_background_insert_max_sleep_time_ms	30000
+distributed_background_insert_sleep_time_ms	100
+distributed_background_insert_split_batch_on_failure	0
+distributed_background_insert_timeout	0
+distributed_connections_pool_size	1024
+distributed_ddl_entry_format_version	5
+distributed_ddl_output_mode	throw
+distributed_ddl_task_timeout	180
+distributed_directory_monitor_batch_inserts	0
+distributed_directory_monitor_max_sleep_time_ms	30000
+distributed_directory_monitor_sleep_time_ms	100
+distributed_directory_monitor_split_batch_on_failure	0
+distributed_foreground_insert	0
+distributed_group_by_no_merge	0
+distributed_product_mode	deny
+distributed_push_down_limit	1
+distributed_replica_error_cap	1000
+distributed_replica_error_half_life	60
+distributed_replica_max_ignored_errors	0
+do_not_merge_across_partitions_select_final	0
+drain_timeout	3
+empty_result_for_aggregation_by_constant_keys_on_empty_set	1
+empty_result_for_aggregation_by_empty_set	0
+enable_debug_queries	0
+enable_deflate_qpl_codec	0
+enable_early_constant_folding	1
+enable_extended_results_for_datetime_functions	0
+enable_filesystem_cache	1
+enable_filesystem_cache_log	0
+enable_filesystem_cache_on_write_operations	0
+enable_filesystem_read_prefetches_log	0
+enable_global_with_statement	1
+enable_http_compression	0
+enable_job_stack_trace	0
+enable_lightweight_delete	1
+enable_memory_bound_merging_of_aggregation_results	1
+enable_multiple_prewhere_read_steps	1
+enable_optimize_predicate_expression	1
+enable_optimize_predicate_expression_to_final_subquery	1
+enable_order_by_all	1
+enable_positional_arguments	1
+enable_reads_from_query_cache	1
+enable_s3_requests_logging	0
+enable_scalar_subquery_optimization	1
+enable_sharing_sets_for_mutations	1
+enable_software_prefetch_in_aggregation	1
+enable_unaligned_array_join	0
+enable_url_encoding	1
+enable_writes_to_query_cache	1
+engine_file_allow_create_multiple_files	0
+engine_file_empty_if_not_exists	0
+engine_file_skip_empty_files	0
+engine_file_truncate_on_insert	0
+engine_url_skip_empty_files	0
+errors_output_format	CSV
+exact_rows_before_limit	0
+except_default_mode	ALL
+external_storage_connect_timeout_sec	10
+external_storage_max_read_bytes	0
+external_storage_max_read_rows	0
+external_storage_rw_timeout_sec	300
+external_table_functions_use_nulls	1
+external_table_strict_query	0
+extract_kvp_max_pairs_per_row	1000
+extremes	0
+fallback_to_stale_replicas_for_distributed_queries	1
+filesystem_cache_max_download_size	137438953472
+filesystem_cache_segments_batch_size	20
+filesystem_prefetch_max_memory_usage	1073741824
+filesystem_prefetch_min_bytes_for_single_read_task	2097152
+filesystem_prefetch_step_bytes	0
+filesystem_prefetch_step_marks	0
+filesystem_prefetches_limit	200
+final	0
+flatten_nested	1
+force_aggregate_partitions_independently	0
+force_aggregation_in_order	0
+force_data_skipping_indices	
+force_grouping_standard_compatibility	1
+force_index_by_date	0
+force_optimize_projection	0
+force_optimize_projection_name	
+force_optimize_skip_unused_shards	0
+force_optimize_skip_unused_shards_nesting	0
+force_primary_key	0
+force_remove_data_recursively_on_drop	0
+format_avro_schema_registry_url	
+format_binary_max_array_size	1073741824
+format_binary_max_string_size	1073741824
+format_capn_proto_enum_comparising_mode	by_values
+format_capn_proto_use_autogenerated_schema	1
+format_csv_allow_double_quotes	1
+format_csv_allow_single_quotes	0
+format_csv_delimiter	,
+format_csv_null_representation	\\N
+format_custom_escaping_rule	Escaped
+format_custom_field_delimiter	\t
+format_custom_result_after_delimiter	
+format_custom_result_before_delimiter	
+format_custom_row_after_delimiter	\n
+format_custom_row_before_delimiter	
+format_custom_row_between_delimiter	
+format_display_secrets_in_show_and_select	0
+format_json_object_each_row_column_for_object_name	
+format_protobuf_use_autogenerated_schema	1
+format_regexp	
+format_regexp_escaping_rule	Raw
+format_regexp_skip_unmatched	0
+format_schema	
+format_template_resultset	
+format_template_row	
+format_template_rows_between_delimiter	\n
+format_tsv_null_representation	\\N
+formatdatetime_f_prints_single_zero	0
+formatdatetime_format_without_leading_zeros	0
+formatdatetime_parsedatetime_m_is_month_name	1
+fsync_metadata	1
+function_implementation	
+function_json_value_return_type_allow_complex	0
+function_json_value_return_type_allow_nullable	0
+function_range_max_elements_in_block	500000000
+function_sleep_max_microseconds_per_block	3000000
+glob_expansion_max_elements	1000
+grace_hash_join_initial_buckets	1
+grace_hash_join_max_buckets	1024
+group_by_overflow_mode	throw
+group_by_two_level_threshold	100000
+group_by_two_level_threshold_bytes	50000000
+group_by_use_nulls	0
+handle_kafka_error_mode	default
+handshake_timeout_ms	10000
+hdfs_create_new_file_on_insert	0
+hdfs_replication	0
+hdfs_skip_empty_files	0
+hdfs_truncate_on_insert	0
+hedged_connection_timeout_ms	50
+hsts_max_age	0
+http_connection_timeout	1
+http_headers_progress_interval_ms	100
+http_make_head_request	1
+http_max_chunk_size	107374182400
+http_max_field_name_size	131072
+http_max_field_value_size	131072
+http_max_fields	1000000
+http_max_multipart_form_data_size	1073741824
+http_max_request_param_data_size	10485760
+http_max_tries	10
+http_max_uri_size	1048576
+http_native_compression_disable_checksumming_on_decompress	0
+http_receive_timeout	30
+http_response_buffer_size	0
+http_retry_initial_backoff_ms	100
+http_retry_max_backoff_ms	10000
+http_send_timeout	30
+http_skip_not_found_url_for_globs	1
+http_wait_end_of_query	0
+http_write_exception_in_output_format	1
+http_zlib_compression_level	3
+idle_connection_timeout	3600
+ignore_cold_parts_seconds	0
+ignore_data_skipping_indices	
+ignore_on_cluster_for_replicated_access_entities_queries	0
+ignore_on_cluster_for_replicated_udf_queries	0
+implicit_transaction	0
+input_format_allow_errors_num	0
+input_format_allow_errors_ratio	0
+input_format_allow_seeks	1
+input_format_arrow_allow_missing_columns	1
+input_format_arrow_case_insensitive_column_matching	0
+input_format_arrow_import_nested	0
+input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference	0
+input_format_avro_allow_missing_fields	0
+input_format_avro_null_as_default	0
+input_format_bson_skip_fields_with_unsupported_types_in_schema_inference	0
+input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference	0
+input_format_csv_allow_cr_end_of_line	0
+input_format_csv_allow_variable_number_of_columns	0
+input_format_csv_allow_whitespace_or_tab_as_delimiter	0
+input_format_csv_arrays_as_nested_csv	0
+input_format_csv_detect_header	1
+input_format_csv_empty_as_default	1
+input_format_csv_enum_as_number	0
+input_format_csv_skip_first_lines	0
+input_format_csv_skip_trailing_empty_lines	0
+input_format_csv_trim_whitespaces	1
+input_format_csv_try_infer_numbers_from_strings	0
+input_format_csv_use_best_effort_in_schema_inference	1
+input_format_csv_use_default_on_bad_values	0
+input_format_custom_allow_variable_number_of_columns	0
+input_format_custom_detect_header	1
+input_format_custom_skip_trailing_empty_lines	0
+input_format_defaults_for_omitted_fields	1
+input_format_hive_text_collection_items_delimiter	
+input_format_hive_text_fields_delimiter	
+input_format_hive_text_map_keys_delimiter	
+input_format_import_nested_json	0
+input_format_ipv4_default_on_conversion_error	0
+input_format_ipv6_default_on_conversion_error	0
+input_format_json_compact_allow_variable_number_of_columns	0
+input_format_json_defaults_for_missing_elements_in_named_tuple	1
+input_format_json_ignore_unknown_keys_in_named_tuple	1
+input_format_json_infer_incomplete_types_as_strings	1
+input_format_json_named_tuples_as_objects	1
+input_format_json_read_arrays_as_strings	1
+input_format_json_read_bools_as_numbers	1
+input_format_json_read_numbers_as_strings	1
+input_format_json_read_objects_as_strings	1
+input_format_json_try_infer_named_tuples_from_objects	1
+input_format_json_try_infer_numbers_from_strings	0
+input_format_json_validate_types_from_metadata	1
+input_format_max_bytes_to_read_for_schema_inference	33554432
+input_format_max_rows_to_read_for_schema_inference	25000
+input_format_msgpack_number_of_columns	0
+input_format_mysql_dump_map_column_names	1
+input_format_mysql_dump_table_name	
+input_format_native_allow_types_conversion	1
+input_format_null_as_default	1
+input_format_orc_allow_missing_columns	1
+input_format_orc_case_insensitive_column_matching	0
+input_format_orc_filter_push_down	1
+input_format_orc_import_nested	0
+input_format_orc_row_batch_size	100000
+input_format_orc_skip_columns_with_unsupported_types_in_schema_inference	0
+input_format_orc_use_fast_decoder	1
+input_format_parallel_parsing	1
+input_format_parquet_allow_missing_columns	1
+input_format_parquet_case_insensitive_column_matching	0
+input_format_parquet_filter_push_down	1
+input_format_parquet_import_nested	0
+input_format_parquet_local_file_min_bytes_for_seek	8192
+input_format_parquet_max_block_size	8192
+input_format_parquet_preserve_order	0
+input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference	0
+input_format_protobuf_flatten_google_wrappers	0
+input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference	0
+input_format_record_errors_file_path	
+input_format_skip_unknown_fields	1
+input_format_try_infer_dates	1
+input_format_try_infer_datetimes	1
+input_format_try_infer_integers	1
+input_format_tsv_allow_variable_number_of_columns	0
+input_format_tsv_detect_header	1
+input_format_tsv_empty_as_default	0
+input_format_tsv_enum_as_number	0
+input_format_tsv_skip_first_lines	0
+input_format_tsv_skip_trailing_empty_lines	0
+input_format_tsv_use_best_effort_in_schema_inference	1
+input_format_values_accurate_types_of_literals	1
+input_format_values_allow_data_after_semicolon	0
+input_format_values_deduce_templates_of_expressions	1
+input_format_values_interpret_expressions	1
+input_format_with_names_use_header	1
+input_format_with_types_use_header	1
+insert_allow_materialized_columns	0
+insert_deduplicate	1
+insert_deduplication_token	
+insert_distributed_one_random_shard	0
+insert_distributed_sync	0
+insert_distributed_timeout	0
+insert_keeper_fault_injection_probability	0
+insert_keeper_fault_injection_seed	0
+insert_keeper_max_retries	20
+insert_keeper_retry_initial_backoff_ms	100
+insert_keeper_retry_max_backoff_ms	10000
+insert_null_as_default	1
+insert_quorum	0
+insert_quorum_parallel	1
+insert_quorum_timeout	600000
+insert_shard_id	0
+interactive_delay	100000
+intersect_default_mode	ALL
+interval_output_format	numeric
+join_algorithm	default
+join_any_take_last_row	0
+join_default_strictness	ALL
+join_on_disk_max_files_to_merge	64
+join_overflow_mode	throw
+join_use_nulls	0
+joined_subquery_requires_alias	1
+kafka_disable_num_consumers_limit	0
+kafka_max_wait_ms	5000
+keeper_map_strict_mode	0
+legacy_column_name_of_tuple_literal	0
+limit	0
+live_view_heartbeat_interval	15
+load_balancing	random
+load_balancing_first_offset	0
+load_marks_asynchronously	0
+local_filesystem_read_method	pread_threadpool
+local_filesystem_read_prefetch	0
+lock_acquire_timeout	120
+log_comment	
+log_formatted_queries	0
+log_processors_profiles	0
+log_profile_events	1
+log_queries	1
+log_queries_cut_to_length	100000
+log_queries_min_query_duration_ms	0
+log_queries_min_type	QUERY_START
+log_queries_probability	1
+log_query_settings	1
+log_query_threads	0
+log_query_views	1
+low_cardinality_allow_in_native_format	1
+low_cardinality_max_dictionary_size	8192
+low_cardinality_use_single_dictionary_for_part	0
+materialize_ttl_after_modify	1
+materialized_views_ignore_errors	0
+max_alter_threads	\'auto(16)\'
+max_analyze_depth	5000
+max_ast_depth	1000
+max_ast_elements	50000
+max_backup_bandwidth	0
+max_block_size	65409
+max_bytes_before_external_group_by	0
+max_bytes_before_external_sort	0
+max_bytes_before_remerge_sort	1000000000
+max_bytes_in_distinct	0
+max_bytes_in_join	0
+max_bytes_in_set	0
+max_bytes_to_read	0
+max_bytes_to_read_leaf	0
+max_bytes_to_sort	0
+max_bytes_to_transfer	0
+max_columns_to_read	0
+max_compress_block_size	1048576
+max_concurrent_queries_for_all_users	0
+max_concurrent_queries_for_user	0
+max_distributed_connections	1024
+max_distributed_depth	5
+max_download_buffer_size	10485760
+max_download_threads	4
+max_entries_for_hash_table_stats	10000
+max_execution_speed	0
+max_execution_speed_bytes	0
+max_execution_time	0
+max_execution_time_leaf	0
+max_expanded_ast_elements	500000
+max_fetch_partition_retries_count	5
+max_final_threads	\'auto(16)\'
+max_http_get_redirects	0
+max_hyperscan_regexp_length	0
+max_hyperscan_regexp_total_length	0
+max_insert_block_size	1048449
+max_insert_delayed_streams_for_parallel_write	0
+max_insert_threads	0
+max_joined_block_size_rows	65409
+max_limit_for_ann_queries	1000000
+max_live_view_insert_blocks_before_refresh	64
+max_local_read_bandwidth	0
+max_local_write_bandwidth	0
+max_memory_usage	0
+max_memory_usage_for_all_queries	0
+max_memory_usage_for_user	0
+max_network_bandwidth	0
+max_network_bandwidth_for_all_users	0
+max_network_bandwidth_for_user	0
+max_network_bytes	0
+max_number_of_partitions_for_independent_aggregation	128
+max_parallel_replicas	1
+max_parser_depth	1000
+max_partition_size_to_drop	50000000000
+max_partitions_per_insert_block	100
+max_partitions_to_read	-1
+max_pipeline_depth	0
+max_query_size	262144
+max_read_buffer_size	1048576
+max_read_buffer_size_local_fs	131072
+max_read_buffer_size_remote_fs	0
+max_remote_read_network_bandwidth	0
+max_remote_read_network_bandwidth_for_server	0
+max_remote_write_network_bandwidth	0
+max_remote_write_network_bandwidth_for_server	0
+max_replica_delay_for_distributed_queries	300
+max_replicated_fetches_network_bandwidth_for_server	0
+max_replicated_sends_network_bandwidth_for_server	0
+max_result_bytes	0
+max_result_rows	0
+max_rows_in_distinct	0
+max_rows_in_join	0
+max_rows_in_set	0
+max_rows_in_set_to_optimize_join	100000
+max_rows_to_group_by	0
+max_rows_to_read	0
+max_rows_to_read_leaf	0
+max_rows_to_sort	0
+max_rows_to_transfer	0
+max_sessions_for_user	0
+max_size_to_preallocate_for_aggregation	100000000
+max_streams_for_merge_tree_reading	0
+max_streams_multiplier_for_merge_tables	5
+max_streams_to_max_threads_ratio	1
+max_subquery_depth	100
+max_table_size_to_drop	50000000000
+max_temporary_columns	0
+max_temporary_data_on_disk_size_for_query	0
+max_temporary_data_on_disk_size_for_user	0
+max_temporary_non_const_columns	0
+max_threads	\'auto(16)\'
+max_threads_for_annoy_index_creation	4
+max_threads_for_indexes	0
+max_untracked_memory	4194304
+memory_overcommit_ratio_denominator	1073741824
+memory_overcommit_ratio_denominator_for_user	1073741824
+memory_profiler_sample_max_allocation_size	0
+memory_profiler_sample_min_allocation_size	0
+memory_profiler_sample_probability	0
+memory_profiler_step	4194304
+memory_tracker_fault_probability	0
+memory_usage_overcommit_max_wait_microseconds	5000000
+merge_tree_clear_old_parts_interval_seconds	1
+merge_tree_clear_old_temporary_directories_interval_seconds	60
+merge_tree_coarse_index_granularity	8
+merge_tree_compact_parts_min_granules_to_multibuffer_read	16
+merge_tree_determine_task_size_by_prewhere_columns	1
+merge_tree_max_bytes_to_use_cache	2013265920
+merge_tree_max_rows_to_use_cache	1048576
+merge_tree_min_bytes_for_concurrent_read	251658240
+merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem	251658240
+merge_tree_min_bytes_for_seek	0
+merge_tree_min_bytes_per_task_for_remote_reading	4194304
+merge_tree_min_rows_for_concurrent_read	163840
+merge_tree_min_rows_for_concurrent_read_for_remote_filesystem	163840
+merge_tree_min_rows_for_seek	0
+merge_tree_use_const_size_tasks_for_remote_reading	1
+metrics_perf_events_enabled	0
+metrics_perf_events_list	
+min_bytes_to_use_direct_io	0
+min_bytes_to_use_mmap_io	0
+min_chunk_bytes_for_parallel_parsing	10485760
+min_compress_block_size	65536
+min_count_to_compile_aggregate_expression	3
+min_count_to_compile_expression	3
+min_count_to_compile_sort_description	3
+min_execution_speed	0
+min_execution_speed_bytes	0
+min_free_disk_space_for_temporary_data	0
+min_hit_rate_to_use_consecutive_keys_optimization	0.5
+min_insert_block_size_bytes	268402944
+min_insert_block_size_bytes_for_materialized_views	0
+min_insert_block_size_rows	1048449
+min_insert_block_size_rows_for_materialized_views	0
+move_all_conditions_to_prewhere	1
+move_primary_key_columns_to_end_of_prewhere	1
+multiple_joins_rewriter_version	0
+multiple_joins_try_to_keep_original_names	0
+mutations_execute_nondeterministic_on_initiator	0
+mutations_execute_subqueries_on_initiator	0
+mutations_max_literal_size_to_replace	16384
+mutations_sync	0
+mysql_datatypes_support_level	
+mysql_map_fixed_string_to_text_in_show_columns	0
+mysql_map_string_to_text_in_show_columns	0
+mysql_max_rows_to_insert	65536
+network_compression_method	LZ4
+network_zstd_compression_level	1
+normalize_function_names	1
+number_of_mutations_to_delay	0
+number_of_mutations_to_throw	0
+odbc_bridge_connection_pool_size	16
+odbc_bridge_use_connection_pooling	1
+odbc_max_field_size	0
+offset	0
+opentelemetry_start_trace_probability	0
+opentelemetry_trace_processors	0
+optimize_aggregation_in_order	0
+optimize_aggregators_of_group_by_keys	1
+optimize_append_index	0
+optimize_arithmetic_operations_in_aggregate_functions	1
+optimize_count_from_files	1
+optimize_distinct_in_order	1
+optimize_distributed_group_by_sharding_key	1
+optimize_duplicate_order_by_and_distinct	0
+optimize_functions_to_subcolumns	0
+optimize_fuse_sum_count_avg	0
+optimize_group_by_constant_keys	1
+optimize_group_by_function_keys	1
+optimize_if_chain_to_multiif	0
+optimize_if_transform_strings_to_enum	0
+optimize_injective_functions_inside_uniq	1
+optimize_min_equality_disjunction_chain_length	3
+optimize_min_inequality_conjunction_chain_length	3
+optimize_monotonous_functions_in_order_by	0
+optimize_move_functions_out_of_any	0
+optimize_move_to_prewhere	1
+optimize_move_to_prewhere_if_final	0
+optimize_multiif_to_if	1
+optimize_normalize_count_variants	1
+optimize_on_insert	1
+optimize_or_like_chain	0
+optimize_read_in_order	1
+optimize_read_in_window_order	1
+optimize_redundant_functions_in_order_by	1
+optimize_respect_aliases	1
+optimize_rewrite_aggregate_function_with_if	1
+optimize_rewrite_array_exists_to_has	0
+optimize_rewrite_sum_if_to_count_if	0
+optimize_skip_merged_partitions	0
+optimize_skip_unused_shards	0
+optimize_skip_unused_shards_limit	1000
+optimize_skip_unused_shards_nesting	0
+optimize_skip_unused_shards_rewrite_in	1
+optimize_sorting_by_input_stream_properties	1
+optimize_substitute_columns	0
+optimize_syntax_fuse_functions	0
+optimize_throw_if_noop	0
+optimize_trivial_approximate_count_query	0
+optimize_trivial_count_query	1
+optimize_trivial_insert_select	1
+optimize_uniq_to_count	1
+optimize_use_implicit_projections	1
+optimize_use_projections	1
+optimize_using_constraints	0
+os_thread_priority	0
+output_format_arrow_compression_method	lz4_frame
+output_format_arrow_fixed_string_as_fixed_byte_array	1
+output_format_arrow_low_cardinality_as_dictionary	0
+output_format_arrow_string_as_string	0
+output_format_avro_codec	
+output_format_avro_rows_in_file	1
+output_format_avro_string_column_pattern	
+output_format_avro_sync_interval	16384
+output_format_bson_string_as_string	0
+output_format_csv_crlf_end_of_line	0
+output_format_decimal_trailing_zeros	0
+output_format_enable_streaming	0
+output_format_json_array_of_rows	0
+output_format_json_escape_forward_slashes	1
+output_format_json_named_tuples_as_objects	1
+output_format_json_quote_64bit_floats	0
+output_format_json_quote_64bit_integers	1
+output_format_json_quote_decimals	0
+output_format_json_quote_denormals	0
+output_format_json_skip_null_value_in_named_tuples	0
+output_format_json_validate_utf8	0
+output_format_markdown_escape_special_characters	0
+output_format_msgpack_uuid_representation	ext
+output_format_orc_compression_method	lz4
+output_format_orc_row_index_stride	10000
+output_format_orc_string_as_string	0
+output_format_parallel_formatting	1
+output_format_parquet_batch_size	1024
+output_format_parquet_compliant_nested_types	1
+output_format_parquet_compression_method	lz4
+output_format_parquet_data_page_size	1048576
+output_format_parquet_fixed_string_as_fixed_byte_array	1
+output_format_parquet_parallel_encoding	1
+output_format_parquet_row_group_size	1000000
+output_format_parquet_row_group_size_bytes	536870912
+output_format_parquet_string_as_string	0
+output_format_parquet_use_custom_encoder	0
+output_format_parquet_version	2.latest
+output_format_pretty_color	1
+output_format_pretty_grid_charset	UTF-8
+output_format_pretty_max_column_pad_width	250
+output_format_pretty_max_rows	10000
+output_format_pretty_max_value_width	10000
+output_format_pretty_row_numbers	0
+output_format_protobuf_nullables_with_google_wrappers	0
+output_format_schema	
+output_format_sql_insert_include_column_names	1
+output_format_sql_insert_max_batch_size	65409
+output_format_sql_insert_quote_names	1
+output_format_sql_insert_table_name	table
+output_format_sql_insert_use_replace	0
+output_format_tsv_crlf_end_of_line	0
+output_format_write_statistics	1
+parallel_distributed_insert_select	0
+parallel_replica_offset	0
+parallel_replicas_count	0
+parallel_replicas_custom_key	
+parallel_replicas_custom_key_filter_type	default
+parallel_replicas_for_non_replicated_merge_tree	0
+parallel_replicas_min_number_of_granules_to_enable	0
+parallel_replicas_min_number_of_rows_per_replica	0
+parallel_replicas_single_task_marks_count_multiplier	2
+parallel_view_processing	0
+parallelize_output_from_storages	1
+parsedatetime_parse_without_leading_zeros	1
+partial_merge_join_left_table_buffer_bytes	0
+partial_merge_join_optimizations	0
+partial_merge_join_rows_in_right_blocks	65536
+partial_result_on_first_cancel	0
+parts_to_delay_insert	0
+parts_to_throw_insert	0
+periodic_live_view_refresh	60
+poll_interval	10
+postgresql_connection_pool_auto_close_connection	0
+postgresql_connection_pool_size	16
+postgresql_connection_pool_wait_timeout	5000
+precise_float_parsing	0
+prefer_column_name_to_alias	0
+prefer_global_in_and_join	0
+prefer_localhost_replica	1
+prefer_warmed_unmerged_parts_seconds	0
+preferred_block_size_bytes	1000000
+preferred_max_column_in_block_size_bytes	0
+preferred_optimize_projection_name	
+prefetch_buffer_size	1048576
+print_pretty_type_names	0
+priority	0
+query_cache_compress_entries	1
+query_cache_max_entries	0
+query_cache_max_size_in_bytes	0
+query_cache_min_query_duration	0
+query_cache_min_query_runs	0
+query_cache_nondeterministic_function_handling	throw
+query_cache_share_between_users	0
+query_cache_squash_partial_results	1
+query_cache_store_results_of_queries_with_nondeterministic_functions	0
+query_cache_ttl	60
+query_plan_aggregation_in_order	1
+query_plan_enable_multithreading_after_window_functions	1
+query_plan_enable_optimizations	1
+query_plan_execute_functions_after_sorting	1
+query_plan_filter_push_down	1
+query_plan_lift_up_array_join	1
+query_plan_lift_up_union	1
+query_plan_max_optimizations_to_apply	10000
+query_plan_merge_expressions	1
+query_plan_optimize_primary_key	1
+query_plan_optimize_projection	1
+query_plan_push_down_limit	1
+query_plan_read_in_order	1
+query_plan_remove_redundant_distinct	1
+query_plan_remove_redundant_sorting	1
+query_plan_reuse_storage_ordering_for_window_functions	1
+query_plan_split_filter	1
+query_profiler_cpu_time_period_ns	1000000000
+query_profiler_real_time_period_ns	1000000000
+queue_max_wait_ms	0
+rabbitmq_max_wait_ms	5000
+read_backoff_max_throughput	1048576
+read_backoff_min_concurrency	1
+read_backoff_min_events	2
+read_backoff_min_interval_between_events_ms	1000
+read_backoff_min_latency_ms	1000
+read_from_filesystem_cache_if_exists_otherwise_bypass_cache	0
+read_in_order_two_level_merge_threshold	100
+read_overflow_mode	throw
+read_overflow_mode_leaf	throw
+read_priority	0
+readonly	0
+receive_data_timeout_ms	2000
+receive_timeout	300
+regexp_dict_allow_hyperscan	1
+regexp_dict_flag_case_insensitive	0
+regexp_dict_flag_dotall	0
+regexp_max_matches_per_row	1000
+reject_expensive_hyperscan_regexps	1
+remerge_sort_lowered_memory_bytes_ratio	2
+remote_filesystem_read_method	threadpool
+remote_filesystem_read_prefetch	1
+remote_fs_read_backoff_max_tries	5
+remote_fs_read_max_backoff_ms	10000
+remote_read_min_bytes_for_seek	4194304
+rename_files_after_processing	
+replace_running_query	0
+replace_running_query_max_wait_ms	5000
+replication_alter_columns_timeout	60
+replication_alter_partitions_sync	1
+replication_wait_for_inactive_replica_timeout	120
+restore_threads	16
+result_overflow_mode	throw
+rewrite_count_distinct_if_with_count_distinct_implementation	0
+s3_allow_parallel_part_upload	1
+s3_check_objects_after_upload	0
+s3_create_new_file_on_insert	0
+s3_disable_checksum	0
+s3_http_connection_pool_size	1000
+s3_list_object_keys_size	1000
+s3_max_connections	1024
+s3_max_get_burst	0
+s3_max_get_rps	0
+s3_max_inflight_parts_for_one_file	20
+s3_max_put_burst	0
+s3_max_put_rps	0
+s3_max_redirects	10
+s3_max_single_part_upload_size	33554432
+s3_max_single_read_retries	4
+s3_max_unexpected_write_error_retries	4
+s3_max_upload_part_size	5368709120
+s3_min_upload_part_size	16777216
+s3_request_timeout_ms	30000
+s3_retry_attempts	100
+s3_skip_empty_files	0
+s3_strict_upload_part_size	0
+s3_throw_on_zero_files_match	0
+s3_truncate_on_insert	0
+s3_upload_part_size_multiply_factor	2
+s3_upload_part_size_multiply_parts_count_threshold	500
+s3_use_adaptive_timeouts	1
+s3queue_default_zookeeper_path	/clickhouse/s3queue/
+s3queue_enable_logging_to_s3queue_log	0
+schema_inference_cache_require_modification_time_for_url	1
+schema_inference_hints	
+schema_inference_make_columns_nullable	1
+schema_inference_mode	default
+schema_inference_use_cache_for_azure	1
+schema_inference_use_cache_for_file	1
+schema_inference_use_cache_for_hdfs	1
+schema_inference_use_cache_for_s3	1
+schema_inference_use_cache_for_url	1
+select_sequential_consistency	0
+send_logs_level	fatal
+send_logs_source_regexp	
+send_progress_in_http_headers	0
+send_timeout	300
+session_timezone	
+set_overflow_mode	throw
+short_circuit_function_evaluation	enable
+show_table_uuid_in_table_create_query_if_not_nil	0
+single_join_prefer_left_table	1
+skip_download_if_exceeds_query_cache	1
+skip_unavailable_shards	0
+sleep_after_receiving_query_ms	0
+sleep_in_send_data_ms	0
+sleep_in_send_tables_status_ms	0
+sort_overflow_mode	throw
+splitby_max_substrings_includes_remaining_string	0
+stop_refreshable_materialized_views_on_startup	0
+storage_file_read_method	pread
+storage_system_stack_trace_pipe_read_timeout_ms	100
+stream_flush_interval_ms	7500
+stream_like_engine_allow_direct_select	0
+stream_like_engine_insert_queue	
+stream_poll_timeout_ms	500
+system_events_show_zero_values	0
+table_function_remote_max_addresses	1000
+tcp_keep_alive_timeout	290
+temporary_files_codec	LZ4
+temporary_live_view_timeout	1
+throw_if_no_data_to_insert	1
+throw_on_error_from_cache_on_write_operations	0
+throw_on_max_partitions_per_insert_block	1
+throw_on_unsupported_query_inside_transaction	1
+timeout_before_checking_execution_speed	10
+timeout_overflow_mode	throw
+timeout_overflow_mode_leaf	throw
+totals_auto_threshold	0.5
+totals_mode	after_having_exclusive
+trace_profile_events	0
+transfer_overflow_mode	throw
+transform_null_in	0
+union_default_mode	
+unknown_packet_in_send_data	0
+use_cache_for_count_from_files	1
+use_client_time_zone	0
+use_compact_format_in_distributed_parts_names	1
+use_concurrency_control	1
+use_hedged_requests	1
+use_index_for_in_with_subqueries	1
+use_index_for_in_with_subqueries_max_values	0
+use_local_cache_for_remote_storage	1
+use_mysql_types_in_show_columns	0
+use_query_cache	0
+use_skip_indexes	1
+use_skip_indexes_if_final	0
+use_structure_from_insertion_table_in_table_functions	2
+use_uncompressed_cache	0
+use_with_fill_by_sorting_prefix	1
+validate_polygons	1
+wait_changes_become_visible_after_commit_mode	wait_unknown
+wait_for_async_insert	1
+wait_for_async_insert_timeout	120
+wait_for_window_view_fire_signal_timeout	10
+window_view_clean_interval	60
+window_view_heartbeat_interval	15
+workload	default
+zstd_window_log_max	0
diff --git a/tests/queries/0_stateless/02995_new_settings_history.reference b/tests/queries/0_stateless/02995_new_settings_history.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh
new file mode 100755
index 00000000000..8932e00086b
--- /dev/null
+++ b/tests/queries/0_stateless/02995_new_settings_history.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-cpu-aarch64
+# Some settings can be different for builds with sanitizers
+
+# Note that this is a broad check. A per version check is done in the upgrade test
+# Baseline generated with 23.12
+# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv
+$CLICKHOUSE_LOCAL --query "
+    WITH old_settings AS
+    (
+        SELECT * FROM file('${CUR_DIR}/02995_baseline_23_12_1.tsv', 'TSV', 'name String, default String')
+    ),
+    new_settings AS
+    (
+        select name, default from system.settings order by name
+    )
+    SELECT * FROM
+    (
+        SELECT 'PLEASE ADD THE NEW SETTING TO SettingsChangesHistory.h: ' || name || ' WAS ADDED',
+        FROM new_settings
+        WHERE (name NOT IN (
+            SELECT name
+            FROM old_settings
+        )) AND (name NOT IN (
+            SELECT arrayJoin(tupleElement(changes, 'name'))
+            FROM system.settings_changes
+            WHERE splitByChar('.', version())[1] >= '24'
+        ))
+        UNION ALL
+        (
+            SELECT 'PLEASE ADD THE SETTING VALUE CHANGE TO SettingsChangesHistory.h: ' || name || ' WAS CHANGED FROM ' || old_settings.default || ' TO ' || new_settings.default,
+            FROM new_settings
+            LEFT JOIN old_settings ON new_settings.name = old_settings.name
+            WHERE (new_settings.default != old_settings.default) AND (name NOT IN (
+                SELECT arrayJoin(tupleElement(changes, 'name'))
+                FROM system.settings_changes
+                WHERE splitByChar('.', version())[1] >= '24'
+            ))
+        )
+    )
+"

From 89006361c5f69f4de49e828bb0813677d8548c02 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 19 Feb 2024 20:52:36 +0100
Subject: [PATCH 827/884] Fix test

---
 .../integration/test_storage_rabbitmq/test.py | 42 ++++++++++++++-----
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 5298d5d8ce2..4485bbe26df 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -47,6 +47,7 @@ instance3 = cluster.add_instance(
         "configs/mergetree.xml",
     ],
     with_rabbitmq=True,
+    stay_alive=True,
 )
 
 # Helpers
@@ -3567,6 +3568,20 @@ def test_attach_broken_table(rabbitmq_cluster):
 def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
     table_name = "nack_failed_insert"
     exchange = f"{table_name}_exchange"
+
+    credentials = pika.PlainCredentials("root", "clickhouse")
+    parameters = pika.ConnectionParameters(
+        rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials
+    )
+    connection = pika.BlockingConnection(parameters)
+    channel = connection.channel()
+
+    channel.exchange_declare(exchange='deadl')
+
+    result = channel.queue_declare(queue='deadq')
+    queue_name = result.method.queue
+    channel.queue_bind(exchange='deadl', routing_key='', queue=queue_name)
+
     instance3.query(
         f"""
         CREATE TABLE test.{table_name} (key UInt64, value UInt64)
@@ -3574,7 +3589,8 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
             SETTINGS rabbitmq_host_port = '{rabbitmq_cluster.rabbitmq_host}:5672',
                      rabbitmq_flush_interval_ms=1000,
                      rabbitmq_exchange_name = '{exchange}',
-                     rabbitmq_format = 'JSONEachRow';
+                     rabbitmq_format = 'JSONEachRow',
+                    rabbitmq_queue_settings_list='x-dead-letter-exchange=deadl';
 
         DROP TABLE IF EXISTS test.view;
         CREATE TABLE test.view (key UInt64, value UInt64)
@@ -3587,29 +3603,32 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
         """
     )
 
-    credentials = pika.PlainCredentials("root", "clickhouse")
-    parameters = pika.ConnectionParameters(
-        rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials
-    )
-    connection = pika.BlockingConnection(parameters)
-    channel = connection.channel()
-
     num_rows = 25
     for i in range(num_rows):
         message = json.dumps({"key": i, "value": i}) + "\n"
         channel.basic_publish(exchange=exchange, routing_key="", body=message)
 
-    connection.close()
-
     instance3.wait_for_log_line(
         "Failed to push to views. Error: Code: 252. DB::Exception: Too many parts"
     )
 
     instance3.replace_in_config(
         "/etc/clickhouse-server/config.d/mergetree.xml",
-        "parts_to_throw_insert>1",
+        "parts_to_throw_insert>0",
         "parts_to_throw_insert>10",
     )
+    instance3.restart_clickhouse()
+
+    count = [0]
+    def on_consume(channel, method, properties, body):
+        channel.basic_publish(exchange=exchange, routing_key="", body=body)
+        count[0] += 1
+        if count[0] == num_rows:
+            channel.stop_consuming()
+
+    channel.basic_consume(queue_name, on_consume)
+    channel.start_consuming()
+
     attempt = 0
     count = 0
     while attempt < 100:
@@ -3627,3 +3646,4 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
         DROP TABLE test.{table_name};
     """
     )
+    connection.close()

From 0b2c3a7f0f2d101a203964ea44c4db0dd72e7f2b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 19 Feb 2024 20:01:58 +0000
Subject: [PATCH 828/884] Automatic style fix

---
 tests/integration/test_storage_rabbitmq/test.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 4485bbe26df..280ce230921 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -3576,11 +3576,11 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
     connection = pika.BlockingConnection(parameters)
     channel = connection.channel()
 
-    channel.exchange_declare(exchange='deadl')
+    channel.exchange_declare(exchange="deadl")
 
-    result = channel.queue_declare(queue='deadq')
+    result = channel.queue_declare(queue="deadq")
     queue_name = result.method.queue
-    channel.queue_bind(exchange='deadl', routing_key='', queue=queue_name)
+    channel.queue_bind(exchange="deadl", routing_key="", queue=queue_name)
 
     instance3.query(
         f"""
@@ -3620,6 +3620,7 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster):
     instance3.restart_clickhouse()
 
     count = [0]
+
     def on_consume(channel, method, properties, body):
         channel.basic_publish(exchange=exchange, routing_key="", body=body)
         count[0] += 1

From bb5a6dd8d3ce4a8560d4ee1a515c4fb64492535c Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 19 Feb 2024 20:44:55 +0000
Subject: [PATCH 829/884] Fix: custom key failover test flakyness

slowdown_count used only by hedge connections
but it's stored inside connection pool and was used for pool shuffling
always. So, query execution which used hedged connections could affect
connection load balancing for queries w/o hedged connections by updating
slowdown_count
---
 src/Client/ConnectionPoolWithFailover.cpp     |  4 ++--
 src/Client/ConnectionPoolWithFailover.h       |  2 +-
 src/Client/HedgedConnectionsFactory.cpp       |  3 ++-
 src/Common/PoolWithFailoverBase.h             | 20 +++++++++++--------
 .../test.py                                   | 16 +++++----------
 5 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index fdc0a11e533..d936d297e78 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -253,13 +253,13 @@ ConnectionPoolWithFailover::tryGetEntry(
 }
 
 std::vector<ConnectionPoolWithFailover::Base::ShuffledPool>
-ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func)
+ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func, bool use_slowdown_count)
 {
     if (!priority_func)
         priority_func = makeGetPriorityFunc(settings);
 
     UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
-    return Base::getShuffledPools(max_ignored_errors, priority_func);
+    return Base::getShuffledPools(max_ignored_errors, priority_func, use_slowdown_count);
 }
 
 }
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index 7ccdd4787a4..fb60782806f 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -91,7 +91,7 @@ public:
     using Status = std::vector<NestedPoolStatus>;
     Status getStatus() const;
 
-    std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {});
+    std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}, bool use_slowdown_count = false);
 
     size_t getMaxErrorCup() const { return Base::max_error_cap; }
 
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
index 82bacece415..f5b074a0257 100644
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -40,7 +40,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
     , max_parallel_replicas(max_parallel_replicas_)
     , skip_unavailable_shards(skip_unavailable_shards_)
 {
-    shuffled_pools = pool->getShuffledPools(settings_, priority_func);
+    shuffled_pools = pool->getShuffledPools(settings_, priority_func, /* use_slowdown_count */ true);
+
     for (const auto & shuffled_pool : shuffled_pools)
         replicas.emplace_back(
             std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 8fd83300eff..0663fbd1143 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -133,7 +133,7 @@ protected:
 
     void updateErrorCounts(PoolStates & states, time_t & last_decrease_time) const;
 
-    std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority);
+    std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority, bool use_slowdown_count = false);
 
     inline void updateSharedErrorCounts(std::vector<ShuffledPool> & shuffled_pools);
 
@@ -160,7 +160,7 @@ protected:
 template <typename TNestedPool>
 std::vector<typename PoolWithFailoverBase<TNestedPool>::ShuffledPool>
 PoolWithFailoverBase<TNestedPool>::getShuffledPools(
-    size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority)
+    size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority, bool use_slowdown_count)
 {
     /// Update random numbers and error counts.
     PoolStates pool_states = updatePoolStates(max_ignored_errors);
@@ -175,13 +175,13 @@ PoolWithFailoverBase<TNestedPool>::getShuffledPools(
     std::vector<ShuffledPool> shuffled_pools;
     shuffled_pools.reserve(nested_pools.size());
     for (size_t i = 0; i < nested_pools.size(); ++i)
-        shuffled_pools.push_back(ShuffledPool{nested_pools[i], &pool_states[i], i, /* error_count = */ 0, /* slowdown_count = */ 0});
+        shuffled_pools.emplace_back(ShuffledPool{.pool = nested_pools[i], .state = &pool_states[i], .index = i});
 
     ::sort(
         shuffled_pools.begin(), shuffled_pools.end(),
-        [](const ShuffledPool & lhs, const ShuffledPool & rhs)
+        [use_slowdown_count](const ShuffledPool & lhs, const ShuffledPool & rhs)
         {
-            return PoolState::compare(*lhs.state, *rhs.state);
+            return PoolState::compare(*lhs.state, *rhs.state, use_slowdown_count);
         });
 
     return shuffled_pools;
@@ -344,10 +344,14 @@ struct PoolWithFailoverBase<TNestedPool>::PoolState
         random = rng();
     }
 
-    static bool compare(const PoolState & lhs, const PoolState & rhs)
+    static bool compare(const PoolState & lhs, const PoolState & rhs, bool use_slowdown_count)
     {
-        return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random)
-             < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random);
+        if (use_slowdown_count)
+            return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random)
+                < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random);
+        else
+            return std::forward_as_tuple(lhs.error_count, lhs.config_priority, lhs.priority, lhs.random)
+                < std::forward_as_tuple(rhs.error_count, rhs.config_priority, rhs.priority, rhs.random);
     }
 
 private:
diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py
index d7e73208798..27e4afb6430 100644
--- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py
@@ -108,15 +108,9 @@ def test_parallel_replicas_custom_key_failover(
             == "subqueries\t4\n"
         )
 
-        # currently this assert is flaky with asan and tsan builds, disable the assert in such cases for now
-        # will be investigated separately
-        if (
-            not node1.is_built_with_thread_sanitizer()
-            and not node1.is_built_with_address_sanitizer()
-        ):
-            assert (
-                node1.query(
-                    f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
-                )
-                == "n1\t3\nn3\t2\n"
+        assert (
+            node1.query(
+                f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
             )
+            == "n1\t3\nn3\t2\n"
+        )

From 474efd98a50d20f955fc5b939916cf9077c3a730 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Feb 2024 20:46:06 +0000
Subject: [PATCH 830/884] Docs: Add example how to use "replace" and "remove"
 attributes in configuration

Bug: 8394
---
 docs/en/operations/configuration-files.md | 56 +++++++++++++++++++++--
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index 81b25a4e897..615cff6496a 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -10,11 +10,57 @@ The ClickHouse server can be configured with configuration files in XML or YAML
 
 It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.
 
-## Overriding Configuration {#override}
+## Merging Configuration {#merging}
 
-The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`.
-- Attribute `replace` means that the element is replaced by the specified one.
-- Attribute `remove` means that the element is deleted.
+Two configuration files (usually the main configuration file and another configuration files from `config.d/`) are merged as follows:
+
+- If a node (i.e. a path leading to an element) appears in both files and does not have attributes `replace` or `remove`, it is included in the merged configuration file and children from both nodes are included and merged recursively.
+- If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included.
+- If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted).
+
+```xml
+<clickhouse>
+    <config_a>
+        <setting_1>1</setting_1>
+    </config_a>
+    <config_b>
+        <setting_2>2</setting_2>
+    </config_b>
+    <config_c>
+        <setting_3>3</setting_3>
+    </config_c>
+</clickhouse>
+```
+
+and
+
+```xml
+<clickhouse>
+    <config_a>
+        <setting_4>4</setting_4>
+    </config_a>
+    <config_b replace="replace">
+        <setting_5>5</setting_5>
+    </config_b>
+    <config_c remove="remove">
+        <setting_6>6</setting_6>
+    </config_c>
+</clickhouse>
+```
+
+generates merged configuration file:
+
+```xml
+<clickhouse>
+    <config_a>
+        <setting_1>1</setting_1>
+        <setting_4>4</setting_4>
+    </config_a>
+    <config_b replace="replace">
+        <setting_5>5</setting_5>
+    </config_b>
+</clickhouse>
+```
 
 To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`.
 
@@ -125,7 +171,7 @@ Users configuration can be split into separate files similar to `config.xml` and
 Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
 Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
 
-Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that.
+Note that configuration files are first [merged](#merging) taking into account settings, and includes are processed after that.
 
 ## XML example {#example}
 

From 6b6630c5a7cc0ed49eb1ef22b139684b3ba22c20 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Feb 2024 23:02:24 +0000
Subject: [PATCH 831/884] Allow casting of bools in string representation to to
 true bools

Fixes #57256
---
 src/Interpreters/convertFieldToType.cpp                       | 4 +++-
 .../0_stateless/02933_compare_with_bool_as_string.reference   | 1 +
 .../queries/0_stateless/02933_compare_with_bool_as_string.sql | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02933_compare_with_bool_as_string.reference
 create mode 100755 tests/queries/0_stateless/02933_compare_with_bool_as_string.sql

diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index c3b8405659a..346180c3613 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -493,10 +493,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
     {
         /// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible.
         /// But don't promote Float32, since we want to keep the exact same value
+        /// Also don't promote domain types (like bool) because we would otherwise use the serializer of the promoted type (e.g. UInt64 for
+        /// bool, which does not allow 'true' and 'false' as input values)
         const IDataType * type_to_parse = &type;
         DataTypePtr holder;
 
-        if (type.canBePromoted() && !which_type.isFloat32())
+        if (type.canBePromoted() && !which_type.isFloat32() && !type.getCustomSerialization())
         {
             holder = type.promoteNumericType();
             type_to_parse = holder.get();
diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql
new file mode 100755
index 00000000000..5dbacd5fbbf
--- /dev/null
+++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql
@@ -0,0 +1 @@
+select true = 'true';

From b2285ce6e5c119356120016f6a45cd93636c9ca9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Feb 2024 06:52:07 +0100
Subject: [PATCH 832/884] Remove extensively aggressive check

---
 src/Interpreters/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 808a2ee2d81..f2aa51bd6de 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -722,7 +722,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             /// TODO: parser should fail early when max_query_size limit is reached.
             ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
 
-#ifndef NDEBUG
+#if 0
             /// Verify that AST formatting is consistent:
             /// If you format AST, parse it back, and format it again, you get the same string.
 

From 9e0f607608027a0182eca9eb257fcd4a0c4ae065 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 16 Feb 2024 18:43:39 +0300
Subject: [PATCH 833/884] Revert "Revert "ReplicatedMergeTree invalid
 metadata_version fix""

---
 src/Storages/StorageReplicatedMergeTree.cpp   | 14 ++++---
 ...ge_tree_invalid_metadata_version.reference | 14 +++++++
 ...ed_merge_tree_invalid_metadata_version.sql | 40 +++++++++++++++++++
 3 files changed, 63 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
 create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index fb4e9b4ad87..a95b3f99b6f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -513,8 +513,15 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
             if (same_structure)
             {
                 Coordination::Stat metadata_stat;
-                current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat);
+                current_zookeeper->get(fs::path(zookeeper_path) / "metadata", &metadata_stat);
+
+                /** We change metadata_snapshot so that `createReplica` method will create `metadata_version` node in ZooKeeper
+                  * with version of table '/metadata' node in Zookeeper.
+                  *
+                  * Otherwise `metadata_version` for not first replica will be initialized with 0 by default.
+                  */
                 setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version));
+                metadata_snapshot = getInMemoryMetadataPtr();
             }
         }
         catch (Coordination::Exception & e)
@@ -5817,6 +5824,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
     Coordination::Requests requests;
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "columns", entry.columns_str, -1));
     requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata", entry.metadata_str, -1));
+    requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(entry.alter_version), -1));
 
     auto table_id = getStorageID();
     auto alter_context = getContext();
@@ -5863,10 +5871,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
         resetObjectColumnsFromActiveParts(parts_lock);
     }
 
-    /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node
-    /// TODO Maybe do in in one transaction for Replicated database?
-    zookeeper->createOrUpdate(fs::path(replica_path) / "metadata_version", std::to_string(current_metadata->getMetadataVersion()), zkutil::CreateMode::Persistent);
-
     return true;
 }
 
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
new file mode 100644
index 00000000000..128e3adcc0a
--- /dev/null
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference
@@ -0,0 +1,14 @@
+Row 1:
+──────
+name:    metadata
+version: 1
+--
+Row 1:
+──────
+name:  metadata_version
+value: 1
+--
+id	UInt64					
+value	String					
+insert_time	DateTime					
+insert_time_updated	DateTime					
diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
new file mode 100644
index 00000000000..3e37f368fd8
--- /dev/null
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
@@ -0,0 +1,40 @@
+-- Tags: zookeeper
+
+DROP TABLE IF EXISTS test_table_replicated;
+CREATE TABLE test_table_replicated
+(
+    id UInt64,
+    value String
+) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id;
+
+ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
+
+SELECT name, version FROM system.zookeeper
+WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/'
+AND name = 'metadata' FORMAT Vertical;
+
+DROP TABLE IF EXISTS test_table_replicated_second;
+CREATE TABLE test_table_replicated_second
+(
+    id UInt64,
+    value String,
+    insert_time DateTime
+) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id;
+
+DROP TABLE test_table_replicated;
+
+SELECT '--';
+
+SELECT name, value FROM system.zookeeper
+WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
+AND name = 'metadata_version' FORMAT Vertical;
+
+SYSTEM RESTART REPLICA test_table_replicated_second;
+
+ALTER TABLE test_table_replicated_second ADD COLUMN insert_time_updated DateTime;
+
+SELECT '--';
+
+DESCRIBE test_table_replicated_second;
+
+DROP TABLE test_table_replicated_second;

From f3c2dfeff30e62dd7785bd52aa2469803d6d0552 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 16 Feb 2024 19:39:57 +0300
Subject: [PATCH 834/884] Fixed tests

---
 .../02989_replicated_merge_tree_invalid_metadata_version.sql  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
index 3e37f368fd8..15633586aa8 100644
--- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
+++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql
@@ -10,7 +10,7 @@ CREATE TABLE test_table_replicated
 ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime;
 
 SELECT name, version FROM system.zookeeper
-WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/'
+WHERE path = (SELECT zookeeper_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated')
 AND name = 'metadata' FORMAT Vertical;
 
 DROP TABLE IF EXISTS test_table_replicated_second;
@@ -26,7 +26,7 @@ DROP TABLE test_table_replicated;
 SELECT '--';
 
 SELECT name, value FROM system.zookeeper
-WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica'
+WHERE path = (SELECT replica_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated_second')
 AND name = 'metadata_version' FORMAT Vertical;
 
 SYSTEM RESTART REPLICA test_table_replicated_second;

From ecaaa5066a27a4c94a25d09e1abc5244ee2f1e9e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 20 Feb 2024 10:19:36 +0100
Subject: [PATCH 835/884] Update NuRaft to master

---
 contrib/NuRaft | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index a44f99fbfb9..5bb3a0e8257 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit a44f99fbfb9bead06630afb0a4bef2bad48d6e4c
+Subproject commit 5bb3a0e8257bacd65b099cb1b7239bd6b9a2c477

From 5ef4a30d6fcc1219a0aa4b309aa98340611158fa Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 20 Feb 2024 10:24:09 +0100
Subject: [PATCH 836/884] Fix

---
 src/Storages/RabbitMQ/RabbitMQSource.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index 25e4b120f42..72196e7dd3c 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -129,12 +129,6 @@ Chunk RabbitMQSource::generateImpl()
         return {};
     }
 
-    if (consumer->needChannelUpdate())
-    {
-        LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID());
-        consumer->updateChannel(storage.getConnection());
-    }
-
     /// Currently it is one time usage source: to make sure data is flushed
     /// strictly by timeout or by block size.
     is_finished = true;

From df53826e862b35459a9097bf96cd3bee8700f851 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 09:32:36 +0000
Subject: [PATCH 837/884] Fix style

---
 tests/queries/0_stateless/02933_compare_with_bool_as_string.sql | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tests/queries/0_stateless/02933_compare_with_bool_as_string.sql

diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql
old mode 100755
new mode 100644

From cfda64e1a4e36d837934a724baf9485422056487 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 20 Feb 2024 10:33:08 +0100
Subject: [PATCH 838/884] Use Keeper only events

---
 src/Coordination/FourLetterCommand.cpp | 11 +++++++++--
 src/Coordination/KeeperConstants.cpp   |  7 ++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 09e99f69fd0..d7fa5abe742 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -38,6 +38,12 @@ String formatZxid(int64_t zxid)
 
 }
 
+#if USE_NURAFT
+namespace ProfileEvents
+{
+    extern const std::vector<Event> keeper_profile_events;
+}
+#endif
 
 namespace DB
 {
@@ -657,6 +663,7 @@ String ProfileEventsCommand::run()
 {
     StringBuffer ret;
 
+#if USE_NURAFT
     auto append = [&ret] (const String & metric, uint64_t value, const String & docs) -> void
     {
         writeText(metric, ret);
@@ -667,14 +674,14 @@ String ProfileEventsCommand::run()
         writeText('\n', ret);
     };
 
-    for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i)
+    for (auto i : ProfileEvents::keeper_profile_events)
     {
         const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed);
-
         std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
         std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(i))};
         append(metric_name, counter, metric_doc);
     }
+#endif
 
     return ret.str();
 }
diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp
index f788095334e..aea2391cf13 100644
--- a/src/Coordination/KeeperConstants.cpp
+++ b/src/Coordination/KeeperConstants.cpp
@@ -284,7 +284,12 @@
     M(InterfaceMySQLSendBytes) \
     M(InterfaceMySQLReceiveBytes) \
     M(InterfacePostgreSQLSendBytes) \
-    M(InterfacePostgreSQLReceiveBytes)
+    M(InterfacePostgreSQLReceiveBytes) \
+\
+    M(KeeperLogsEntryReadFromLatestCache) \
+    M(KeeperLogsEntryReadFromCommitCache) \
+    M(KeeperLogsEntryReadFromFile) \
+    M(KeeperLogsPrefetchedEntries) \
 
 namespace ProfileEvents
 {

From c475f3c0bda0636d45c250f9866be0732a059fda Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 10:05:54 +0000
Subject: [PATCH 839/884] Fix test_remote_blobs_naming

---
 .../test_backward_compatibility.py                        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py
index 8c52b05dba2..8bdd82ce9bf 100644
--- a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py
+++ b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py
@@ -288,7 +288,7 @@ def test_replicated_merge_tree(cluster, test_case):
                 WHERE
                     local_path LIKE '%{uuid}%'
                     AND local_path NOT LIKE '%format_version.txt%'
-                ORDER BY ALL
+                ORDER BY *
                 """
             ).strip()
 
@@ -329,7 +329,7 @@ def test_replicated_merge_tree(cluster, test_case):
                 WHERE
                     table = 'test_replicated_merge_tree'
                     AND active
-                ORDER BY ALL
+                ORDER BY *
                 """
             )
             .strip()
@@ -349,7 +349,7 @@ def test_replicated_merge_tree(cluster, test_case):
                     SELECT name
                     FROM system.zookeeper
                     WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}'
-                    ORDER BY ALL
+                    ORDER BY *
                     """
                 )
                 .strip()
@@ -363,7 +363,7 @@ def test_replicated_merge_tree(cluster, test_case):
                         SELECT name
                         FROM system.zookeeper
                         WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}/{blob}'
-                        ORDER BY ALL
+                        ORDER BY *
                         """
                     )
                     .strip()

From 3dac4c97438317db2357d0e1db1756a2638da55b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 20 Feb 2024 11:10:11 +0100
Subject: [PATCH 840/884] Fix

---
 src/Interpreters/S3QueueLog.cpp        | 8 ++++++--
 src/Interpreters/S3QueueLog.h          | 6 +++++-
 src/Storages/S3Queue/S3QueueSource.cpp | 9 ++++++++-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp
index 967becb6e0f..3ed58de0f87 100644
--- a/src/Interpreters/S3QueueLog.cpp
+++ b/src/Interpreters/S3QueueLog.cpp
@@ -28,7 +28,9 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription()
         {"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
         {"event_date", std::make_shared<DataTypeDate>()},
         {"event_time", std::make_shared<DataTypeDateTime>()},
-        {"table_uuid", std::make_shared<DataTypeString>()},
+        {"database", std::make_shared<DataTypeString>()},
+        {"table", std::make_shared<DataTypeString>()},
+        {"uuid", std::make_shared<DataTypeString>()},
         {"file_name", std::make_shared<DataTypeString>()},
         {"rows_processed", std::make_shared<DataTypeUInt64>()},
         {"status", status_datatype},
@@ -45,7 +47,9 @@ void S3QueueLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(getFQDNOrHostName());
     columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType());
     columns[i++]->insert(event_time);
-    columns[i++]->insert(table_uuid);
+    columns[i++]->insert(database);
+    columns[i++]->insert(table);
+    columns[i++]->insert(uuid);
     columns[i++]->insert(file_name);
     columns[i++]->insert(rows_processed);
     columns[i++]->insert(status);
diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h
index e0362bf9716..b6bc138d42c 100644
--- a/src/Interpreters/S3QueueLog.h
+++ b/src/Interpreters/S3QueueLog.h
@@ -12,7 +12,11 @@ namespace DB
 struct S3QueueLogElement
 {
     time_t event_time{};
-    std::string table_uuid;
+
+    std::string database;
+    std::string table;
+    std::string uuid;
+
     std::string file_name;
     size_t rows_processed = 0;
 
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index b4f5f957f76..933238d8614 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -352,7 +352,11 @@ void StorageS3QueueSource::applyActionAfterProcessing(const String & path)
     }
 }
 
-void StorageS3QueueSource::appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed)
+void StorageS3QueueSource::appendLogElement(
+    const std::string & filename,
+    S3QueueFilesMetadata::FileStatus & file_status_,
+    size_t processed_rows,
+    bool processed)
 {
     if (!s3_queue_log)
         return;
@@ -363,6 +367,9 @@ void StorageS3QueueSource::appendLogElement(const std::string & filename, S3Queu
         elem = S3QueueLogElement
         {
             .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
+            .database = storage_id.database_name,
+            .table = storage_id.table_name,
+            .uuid = toString(storage_id.uuid),
             .file_name = filename,
             .rows_processed = processed_rows,
             .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed,

From 4412f71a1252198d1f18117ebf54f2204a71fc36 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 10:16:46 +0000
Subject: [PATCH 841/884] Docs: Follow-up to #60157

---
 docs/en/operations/configuration-files.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index 615cff6496a..9f17f4af1e8 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -18,7 +18,11 @@ Two configuration files (usually the main configuration file and another configu
 - If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included.
 - If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted).
 
+Example:
+
+
 ```xml
+<!-- config.xml -->
 <clickhouse>
     <config_a>
         <setting_1>1</setting_1>
@@ -35,6 +39,7 @@ Two configuration files (usually the main configuration file and another configu
 and
 
 ```xml
+<!-- config.d/other_config.xml -->
 <clickhouse>
     <config_a>
         <setting_4>4</setting_4>
@@ -56,7 +61,7 @@ generates merged configuration file:
         <setting_1>1</setting_1>
         <setting_4>4</setting_4>
     </config_a>
-    <config_b replace="replace">
+    <config_b>
         <setting_5>5</setting_5>
     </config_b>
 </clickhouse>

From 354193795058d80f5d11deb82c0fb837ed46113f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Feb 2024 12:26:51 +0100
Subject: [PATCH 842/884] Add a test for #44318

---
 .../02997_projections_formatting.reference         | 14 ++++++++++++++
 .../0_stateless/02997_projections_formatting.sql   |  2 ++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/queries/0_stateless/02997_projections_formatting.reference
 create mode 100644 tests/queries/0_stateless/02997_projections_formatting.sql

diff --git a/tests/queries/0_stateless/02997_projections_formatting.reference b/tests/queries/0_stateless/02997_projections_formatting.reference
new file mode 100644
index 00000000000..7b9d6185986
--- /dev/null
+++ b/tests/queries/0_stateless/02997_projections_formatting.reference
@@ -0,0 +1,14 @@
+CREATE TEMPORARY TABLE t_proj
+(
+    `t` DateTime,
+    `id` UInt64,
+    PROJECTION p
+    (
+        SELECT 
+            id,
+            t
+        ORDER BY toStartOfDay(t)
+    )
+)
+ENGINE = MergeTree
+ORDER BY id
diff --git a/tests/queries/0_stateless/02997_projections_formatting.sql b/tests/queries/0_stateless/02997_projections_formatting.sql
new file mode 100644
index 00000000000..ef051bca2e9
--- /dev/null
+++ b/tests/queries/0_stateless/02997_projections_formatting.sql
@@ -0,0 +1,2 @@
+CREATE TEMPORARY TABLE t_proj (t DateTime, id UInt64, PROJECTION p (SELECT id, t ORDER BY toStartOfDay(t))) ENGINE = MergeTree ORDER BY id;
+SHOW CREATE TEMPORARY TABLE t_proj FORMAT TSVRaw;

From 9036a6974792b6d7a73b190c76e90e0e72ae0296 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Feb 2024 12:28:01 +0100
Subject: [PATCH 843/884] Add a test for #44318

---
 .../02997_projections_formatting.reference           | 12 ++++++++++++
 .../0_stateless/02997_projections_formatting.sql     |  3 +++
 2 files changed, 15 insertions(+)

diff --git a/tests/queries/0_stateless/02997_projections_formatting.reference b/tests/queries/0_stateless/02997_projections_formatting.reference
index 7b9d6185986..6a60da1089a 100644
--- a/tests/queries/0_stateless/02997_projections_formatting.reference
+++ b/tests/queries/0_stateless/02997_projections_formatting.reference
@@ -12,3 +12,15 @@ CREATE TEMPORARY TABLE t_proj
 )
 ENGINE = MergeTree
 ORDER BY id
+CREATE TEMPORARY TABLE t_proj2
+(
+    `a` UInt32,
+    `b` UInt32,
+    PROJECTION p
+    (
+        SELECT a
+        ORDER BY b * 2
+    )
+)
+ENGINE = MergeTree
+ORDER BY a
diff --git a/tests/queries/0_stateless/02997_projections_formatting.sql b/tests/queries/0_stateless/02997_projections_formatting.sql
index ef051bca2e9..b593c2576b1 100644
--- a/tests/queries/0_stateless/02997_projections_formatting.sql
+++ b/tests/queries/0_stateless/02997_projections_formatting.sql
@@ -1,2 +1,5 @@
 CREATE TEMPORARY TABLE t_proj (t DateTime, id UInt64, PROJECTION p (SELECT id, t ORDER BY toStartOfDay(t))) ENGINE = MergeTree ORDER BY id;
 SHOW CREATE TEMPORARY TABLE t_proj FORMAT TSVRaw;
+
+CREATE TEMPORARY TABLE t_proj2 (a UInt32, b UInt32, PROJECTION p (SELECT a ORDER BY b * 2)) ENGINE = MergeTree ORDER BY a;
+SHOW CREATE TEMPORARY TABLE t_proj2 FORMAT TSVRaw;

From ead2e7bc41e71eb2041d8616f9fa5e3ecc2a2f61 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Feb 2024 12:32:25 +0100
Subject: [PATCH 844/884] Revert "Restriction for the access key id for s3."

---
 src/IO/S3/Client.cpp                             | 16 ----------------
 .../02966_s3_access_key_id_restriction.reference |  0
 .../02966_s3_access_key_id_restriction.sql       |  6 ------
 3 files changed, 22 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference
 delete mode 100644 tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql

diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 7f0ede72740..182e7ad18cd 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -27,7 +27,6 @@
 
 #include <base/sleep.h>
 
-#include <algorithm>
 
 namespace ProfileEvents
 {
@@ -48,7 +47,6 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int TOO_MANY_REDIRECTS;
-    extern const int BAD_ARGUMENTS;
 }
 
 namespace S3
@@ -106,19 +104,6 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c
     assert_cast<const Client::RetryStrategy &>(*client_config.retryStrategy);
 }
 
-void validateCredentials(const Aws::Auth::AWSCredentials& auth_credentials)
-{
-    if (auth_credentials.GetAWSAccessKeyId().empty())
-    {
-        return;
-    }
-    /// Follow https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html
-    if (!std::all_of(auth_credentials.GetAWSAccessKeyId().begin(), auth_credentials.GetAWSAccessKeyId().end(), isWordCharASCII))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Access key id has an invalid character");
-    }
-}
-
 void addAdditionalAMZHeadersToCanonicalHeadersList(
     Aws::AmazonWebServiceRequest & request,
     const HTTPHeaderEntries & extra_headers
@@ -144,7 +129,6 @@ std::unique_ptr<Client> Client::create(
     const ClientSettings & client_settings)
 {
     verifyClientConfiguration(client_configuration);
-    validateCredentials(credentials_provider->GetAWSCredentials());
     return std::unique_ptr<Client>(
         new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, client_settings));
 }
diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql
deleted file mode 100644
index c1ca0b4bcd5..00000000000
--- a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql
+++ /dev/null
@@ -1,6 +0,0 @@
--- Tags: no-fasttest
-
-select * from s3('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 }
-select * from deltaLake('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 }
-select * from hudi('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 }
-select * from iceberg('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 }

From 6249e8da3f89596eb36a4dd9dc8bd259ed05748b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 19 Feb 2024 13:43:19 +0000
Subject: [PATCH 845/884] Analyzer: fix row level filters with PREWHERE +
 additional filters

---
 src/Planner/PlannerJoinTree.cpp                | 10 +++++++---
 tests/analyzer_tech_debt.txt                   |  1 -
 .../02131_row_policies_combination.reference   |  9 +++++++++
 .../02131_row_policies_combination.sql         | 18 ++++++++++++++++++
 ...02763_row_policy_storage_merge_alias.sql.j2 |  2 --
 5 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 227ac86d3a5..e6a459d0e8a 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -801,14 +801,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                             table_expression_query_info.prewhere_info->prewhere_actions = filter_info.actions;
                             table_expression_query_info.prewhere_info->prewhere_column_name = filter_info.column_name;
                             table_expression_query_info.prewhere_info->remove_prewhere_column = filter_info.do_remove_column;
+                            table_expression_query_info.prewhere_info->need_filter = true;
                         }
-                        else
+                        else if (!table_expression_query_info.prewhere_info->row_level_filter)
                         {
                             table_expression_query_info.prewhere_info->row_level_filter = filter_info.actions;
                             table_expression_query_info.prewhere_info->row_level_column_name = filter_info.column_name;
+                            table_expression_query_info.prewhere_info->need_filter = true;
+                        }
+                        else
+                        {
+                            where_filters.emplace_back(filter_info, std::move(description));
                         }
-
-                        table_expression_query_info.prewhere_info->need_filter = true;
                     }
                     else
                     {
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 0566dca8f5c..094865573f9 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -16,7 +16,6 @@
 02354_annoy
 02493_inconsistent_hex_and_binary_number
 02725_agg_projection_resprect_PK
-02763_row_policy_storage_merge_alias
 # Check after constants refactoring
 02901_parallel_replicas_rollup
 # Flaky. Please don't delete them without fixing them:
diff --git a/tests/queries/0_stateless/02131_row_policies_combination.reference b/tests/queries/0_stateless/02131_row_policies_combination.reference
index b76028d5077..5015cb14456 100644
--- a/tests/queries/0_stateless/02131_row_policies_combination.reference
+++ b/tests/queries/0_stateless/02131_row_policies_combination.reference
@@ -12,6 +12,15 @@ R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)
 1
 2
 3
+R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)
+2
+3
+R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)
+2
+3
+R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)
+2
+3
 R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)
 1
 2
diff --git a/tests/queries/0_stateless/02131_row_policies_combination.sql b/tests/queries/0_stateless/02131_row_policies_combination.sql
index b5be672bb1b..02f2365eed8 100644
--- a/tests/queries/0_stateless/02131_row_policies_combination.sql
+++ b/tests/queries/0_stateless/02131_row_policies_combination.sql
@@ -23,6 +23,24 @@ CREATE ROW POLICY 02131_filter_3 ON 02131_rptable USING x=3 AS permissive TO ALL
 SELECT 'R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)';
 SELECT * FROM 02131_rptable;
 
+SELECT 'R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)';
+SELECT * FROM 02131_rptable
+PREWHERE x >= 2
+SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'}
+;
+
+SELECT 'R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)';
+SELECT * FROM 02131_rptable
+PREWHERE x >= 2
+SETTINGS additional_result_filter = 'x > 1'
+;
+
+SELECT 'R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)';
+SELECT * FROM 02131_rptable
+WHERE x >= 2
+SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'}
+;
+
 CREATE ROW POLICY 02131_filter_4 ON 02131_rptable USING x<=2 AS restrictive TO ALL;
 SELECT 'R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)';
 SELECT * FROM 02131_rptable;
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2
index bdd456951dd..99ac89c4eb4 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2
@@ -12,8 +12,6 @@ CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentD
 
 {% for prew in [0 , 1] -%}
 
-
-
 SELECT 'x, y, z FROM 02763_a_merge';
 SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT '* FROM 02763_a_merge';

From 0fff496ad388df450af538df2cc3a57c29c86f45 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 11:57:08 +0000
Subject: [PATCH 846/884] Refactoring and cleanup

---
 src/Functions/array/arrayDotProduct.cpp  |   7 +-
 src/Functions/array/arrayScalarProduct.h | 148 +++++++++++------------
 2 files changed, 76 insertions(+), 79 deletions(-)

diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 47e865785d4..180f85499cd 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -49,14 +49,13 @@ public:
         if (!valid)
             throw Exception(
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Arguments of function {} "
-                "only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
-                std::string(NameArrayDotProduct::name));
+                "Arguments of function {} only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                NameArrayDotProduct::name);
         return result_type;
     }
 
     template <typename ResultType, typename T, typename U>
-    static inline NO_SANITIZE_UNDEFINED ResultType apply(
+    static NO_SANITIZE_UNDEFINED ResultType apply(
         const T * left,
         const U * right,
         size_t size)
diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h
index 374a2d8a194..2bef11eade5 100644
--- a/src/Functions/array/arrayScalarProduct.h
+++ b/src/Functions/array/arrayScalarProduct.h
@@ -16,9 +16,9 @@ class Context;
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
 }
 
@@ -28,9 +28,81 @@ class FunctionArrayScalarProduct : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
+
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        // Basic type check
+        std::vector<DataTypePtr> nested_types(2, nullptr);
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
+            if (!array_type)
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Arguments for function {} must be of type Array", getName());
+
+            const auto & nested_type = array_type->getNestedType();
+            if (!isNativeNumber(nested_type) && !isEnum(nested_type))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Function {} cannot process values of type {}", getName(), nested_type->getName());
+
+            nested_types[i] = nested_type;
+        }
+
+        // Perform further type checks in Method
+        return Method::getReturnType(nested_types[0], nested_types[1]);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
+    {
+        switch (result_type->getTypeId())
+        {
+        #define SUPPORTED_TYPE(type) \
+            case TypeIndex::type: \
+                return executeWithResultType<type>(arguments); \
+                break;
+
+            SUPPORTED_TYPE(UInt8)
+            SUPPORTED_TYPE(UInt16)
+            SUPPORTED_TYPE(UInt32)
+            SUPPORTED_TYPE(UInt64)
+            SUPPORTED_TYPE(Int8)
+            SUPPORTED_TYPE(Int16)
+            SUPPORTED_TYPE(Int32)
+            SUPPORTED_TYPE(Int64)
+            SUPPORTED_TYPE(Float32)
+            SUPPORTED_TYPE(Float64)
+        #undef SUPPORTED_TYPE
+
+            default:
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName());
+        }
+    }
 
 private:
+    template <typename ResultType>
+    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const
+    {
+        ColumnPtr res;
+        if (!((res = executeNumber<ResultType, UInt8>(arguments))
+            || (res = executeNumber<ResultType, UInt16>(arguments))
+            || (res = executeNumber<ResultType, UInt32>(arguments))
+            || (res = executeNumber<ResultType, UInt64>(arguments))
+            || (res = executeNumber<ResultType, Int8>(arguments))
+            || (res = executeNumber<ResultType, Int16>(arguments))
+            || (res = executeNumber<ResultType, Int32>(arguments))
+            || (res = executeNumber<ResultType, Int64>(arguments))
+            || (res = executeNumber<ResultType, Float32>(arguments))
+            || (res = executeNumber<ResultType, Float64>(arguments))))
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
+
+        return res;
+    }
 
     template <typename ResultType, typename T>
     ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments) const
@@ -51,7 +123,6 @@ private:
        return nullptr;
     }
 
-
     template <typename ResultType, typename T, typename U>
     ColumnPtr executeNumberNumber(const ColumnsWithTypeAndName & arguments) const
     {
@@ -103,79 +174,6 @@ private:
         }
     }
 
-public:
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 2; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        // Basic type check
-        std::vector<DataTypePtr> nested_types(2, nullptr);
-        for (size_t i = 0; i < getNumberOfArguments(); ++i)
-        {
-            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
-            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName());
-
-            const auto & nested_type = array_type->getNestedType();
-            if (!isNativeNumber(nested_type) && !isEnum(nested_type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}",
-                                getName(), nested_type->getName());
-            nested_types[i] = nested_type;
-        }
-
-        // Detail type check in Method, then return ReturnType
-        return Method::getReturnType(nested_types[0], nested_types[1]);
-    }
-
-    template <typename ResultType>
-    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const
-    {
-        ColumnPtr res;
-        if (!((res = executeNumber<ResultType, UInt8>(arguments))
-            || (res = executeNumber<ResultType, UInt16>(arguments))
-            || (res = executeNumber<ResultType, UInt32>(arguments))
-            || (res = executeNumber<ResultType, UInt64>(arguments))
-            || (res = executeNumber<ResultType, Int8>(arguments))
-            || (res = executeNumber<ResultType, Int16>(arguments))
-            || (res = executeNumber<ResultType, Int32>(arguments))
-            || (res = executeNumber<ResultType, Int64>(arguments))
-            || (res = executeNumber<ResultType, Float32>(arguments))
-            || (res = executeNumber<ResultType, Float64>(arguments))))
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
-
-        return res;
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
-    {
-        switch (result_type->getTypeId())
-        {
-        #define SUPPORTED_TYPE(type) \
-            case TypeIndex::type: \
-                return executeWithResultType<type>(arguments); \
-                break;
-
-            SUPPORTED_TYPE(UInt8)
-            SUPPORTED_TYPE(UInt16)
-            SUPPORTED_TYPE(UInt32)
-            SUPPORTED_TYPE(UInt64)
-            SUPPORTED_TYPE(Int8)
-            SUPPORTED_TYPE(Int16)
-            SUPPORTED_TYPE(Int32)
-            SUPPORTED_TYPE(Int64)
-            SUPPORTED_TYPE(Float32)
-            SUPPORTED_TYPE(Float64)
-        #undef SUPPORTED_TYPE
-
-            default:
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName());
-        }
-    }
 };
 
 }

From a8e4627663599a3fa722fdb470e058c18c79c933 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 20 Feb 2024 12:09:54 +0000
Subject: [PATCH 847/884] Fix: can't guarantee query distribution with enabled
 hedged requests

---
 src/Client/ConnectionPoolWithFailover.h             |  4 ++--
 .../test.py                                         | 13 ++++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index fb60782806f..45916a395ef 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -83,8 +83,8 @@ public:
     struct NestedPoolStatus
     {
         const Base::NestedPoolPtr pool;
-        size_t error_count;
-        size_t slowdown_count;
+        size_t error_count = 0;
+        size_t slowdown_count = 0;
         std::chrono::seconds estimated_recovery_time;
     };
 
diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py
index 27e4afb6430..bbb8fd5abb0 100644
--- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py
@@ -108,9 +108,12 @@ def test_parallel_replicas_custom_key_failover(
             == "subqueries\t4\n"
         )
 
-        assert (
-            node1.query(
-                f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
+        # With enabled hedged requests, we can't guarantee exact query distribution among nodes
+        # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice
+        if use_hedged_requests == 0:
+            assert (
+                node1.query(
+                    f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
+                )
+                == "n1\t3\nn3\t2\n"
             )
-            == "n1\t3\nn3\t2\n"
-        )

From 37b85360e19cad7e238271259c24214e7362ea22 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 20 Feb 2024 13:15:34 +0100
Subject: [PATCH 848/884] Update CNF.cpp

---
 src/Analyzer/Passes/CNF.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp
index 5cb79011856..71549f9e71d 100644
--- a/src/Analyzer/Passes/CNF.cpp
+++ b/src/Analyzer/Passes/CNF.cpp
@@ -163,7 +163,7 @@ private:
 class PushOrVisitor
 {
 public:
-    PushOrVisitor(size_t max_atoms_)
+    explicit PushOrVisitor(size_t max_atoms_)
         : max_atoms(max_atoms_)
         , and_resolver(createInternalFunctionAndOverloadResolver())
         , or_resolver(createInternalFunctionOrOverloadResolver())

From 5267e6b0ce31a2e82aedc30dbe854d227c526b3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 13:38:33 +0100
Subject: [PATCH 849/884] Fix arrayReduce with nullable aggregate function name

---
 src/Functions/array/arrayReduce.cpp                |  3 +++
 .../02996_nullable_arrayReduce.reference           |  0
 .../0_stateless/02996_nullable_arrayReduce.sql     | 14 ++++++++++++++
 3 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/02996_nullable_arrayReduce.reference
 create mode 100644 tests/queries/0_stateless/02996_nullable_arrayReduce.sql

diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp
index 5a6a99ef785..4e192cd7d99 100644
--- a/src/Functions/array/arrayReduce.cpp
+++ b/src/Functions/array/arrayReduce.cpp
@@ -48,6 +48,9 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
+    /// As we parse the function name and deal with arrays we don't want to default NULL handler, which will hide
+    /// nullability from us (which also means hidden from the aggregate functions)
+    bool useDefaultImplementationForNulls() const override { return false; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
new file mode 100644
index 00000000000..1019569284f
--- /dev/null
+++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
@@ -0,0 +1,14 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/59600
+SELECT arrayReduce(toNullable('stddevSampOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayReduce(toNullable('median'), [toDecimal32OrNull(toFixedString('1', 1), 2), 8]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toFixedString('--- Int Empty ---', toLowCardinality(17)), arrayReduce(toNullable('avgOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+-- { echoOn }
+SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a);
+SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a);
+SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
+SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
+SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
+SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
+
+SELECT arrayReduce('any', toNullable(3));

From 0e3861d65de74d87f1a0dac9edeeef7d3afc6cec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 14:20:14 +0100
Subject: [PATCH 850/884] Fix tests

---
 tests/queries/0_stateless/02732_transform_fuzz.sql |  2 +-
 .../02996_nullable_arrayReduce.reference           | 14 ++++++++++++++
 .../0_stateless/02996_nullable_arrayReduce.sql     |  2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02732_transform_fuzz.sql b/tests/queries/0_stateless/02732_transform_fuzz.sql
index c2918d4da81..872cf3a6599 100644
--- a/tests/queries/0_stateless/02732_transform_fuzz.sql
+++ b/tests/queries/0_stateless/02732_transform_fuzz.sql
@@ -1 +1 @@
-SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError BAD_ARGUMENTS }
+SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
index e69de29bb2d..c6f369c1237 100644
--- a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
+++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
@@ -0,0 +1,14 @@
+-- { echoOn }
+SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a);
+0	UInt64
+SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a);
+\N	Nullable(UInt64)
+SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
+\N	Nullable(UInt64)
+SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
+10	Nullable(UInt64)
+SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
+\N	Nullable(UInt8)
+SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
+10	Nullable(UInt8)
+SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
index 1019569284f..26697d2f10c 100644
--- a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
+++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
@@ -11,4 +11,4 @@ SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a
 SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
 SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
 
-SELECT arrayReduce('any', toNullable(3));
+SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

From 9bf3d6ae2927f5d1b4ffdccde2da93a73518c133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 14:23:44 +0100
Subject: [PATCH 851/884] Add test for 59437

---
 .../02996_analyzer_prewhere_projection.reference           | 1 +
 .../0_stateless/02996_analyzer_prewhere_projection.sql     | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference
 create mode 100644 tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql

diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference
new file mode 100644
index 00000000000..72749c905a3
--- /dev/null
+++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference
@@ -0,0 +1 @@
+1	1	1
diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql
new file mode 100644
index 00000000000..9d676001010
--- /dev/null
+++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql
@@ -0,0 +1,7 @@
+SET allow_suspicious_low_cardinality_types=1;
+
+CREATE TABLE t__fuzz_0 (`i` LowCardinality(Int32), `j` Int32, `k` Int32, PROJECTION p (SELECT * ORDER BY j)) ENGINE = MergeTree ORDER BY i SETTINGS index_granularity = 1;
+INSERT INTO t__fuzz_0 Select number, number, number FROM numbers(100);
+
+SELECT * FROM t__fuzz_0 PREWHERE 7 AND (i < 2147483647) AND (j IN (2147483646, -2, 1))
+SETTINGS allow_experimental_analyzer = true;

From 11f3b060997e4a0e4ee7f6c797f6c7689556c179 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 20 Feb 2024 14:35:19 +0100
Subject: [PATCH 852/884] Fix

---
 src/Coordination/Changelog.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 16f6c36b8a3..70224029da3 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -68,9 +68,13 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip
 
     /// a different thread could be trying to read from the file
     /// we should make sure the source disk contains the file while read is in progress
-    description->withLock([&]{ description->disk = disk_to; });
+    description->withLock(
+        [&]
+        {
+            description->disk = disk_to;
+            description->path = path_to;
+        });
     disk_from->removeFile(description->path);
-    description->path = path_to;
 }
 
 constexpr auto DEFAULT_PREFIX = "changelog";

From bb0b1fa9bc412d058315694730796b907cc7bca1 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 20 Feb 2024 14:21:29 +0100
Subject: [PATCH 853/884] Fix

---
 programs/keeper/CMakeLists.txt                |   2 +-
 src/Coordination/Changelog.cpp                |  99 ++-------------
 src/Coordination/KeeperCommon.cpp             | 118 ++++++++++++++++++
 src/Coordination/KeeperCommon.h               |  28 +++++
 src/Coordination/KeeperSnapshotManager.cpp    |  80 ++----------
 src/Coordination/KeeperStorage.cpp            |   3 +-
 src/Coordination/ZooKeeperDataReader.cpp      |   2 +-
 src/Coordination/pathUtils.cpp                |  37 ------
 src/Coordination/pathUtils.h                  |  13 --
 src/Coordination/tests/gtest_coordination.cpp |   4 +-
 10 files changed, 171 insertions(+), 215 deletions(-)
 create mode 100644 src/Coordination/KeeperCommon.cpp
 create mode 100644 src/Coordination/KeeperCommon.h
 delete mode 100644 src/Coordination/pathUtils.cpp
 delete mode 100644 src/Coordination/pathUtils.h

diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index b8a5d9c9c19..70e0f229fd4 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -41,7 +41,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 40ece0e7d2e..cf0ea2193c8 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1,7 +1,7 @@
-#include <chrono>
 #include <filesystem>
 #include <Coordination/Changelog.h>
 #include <Coordination/CoordinationSettings.h>
+#include <Coordination/KeeperCommon.h>
 #include <Disks/DiskLocal.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
@@ -35,88 +35,15 @@ namespace ErrorCodes
 namespace
 {
 
-constexpr std::string_view tmp_prefix = "tmp_";
-
-void moveFileBetweenDisks(
+void moveChangelogBetweenDisks(
     DiskPtr disk_from,
     ChangelogFileDescriptionPtr description,
     DiskPtr disk_to,
     const std::string & path_to,
     const KeeperContextPtr & keeper_context)
 {
-    auto logger = getLogger("Changelog");
-    LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", description->path, path_to, disk_from->getName(), disk_to->getName());
-    /// we use empty file with prefix tmp_ to detect incomplete copies
-    /// if a copy is complete we don't care from which disk we use the same file
-    /// so it's okay if a failure happens after removing of tmp file but before we remove
-    /// the changelog from the source disk
-    auto from_path = fs::path(description->path);
-    auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string());
-
-    const auto & coordination_settings = keeper_context->getCoordinationSettings();
-    auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
-    auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
-    auto run_with_retries = [&](const auto & op, std::string_view operation_description)
-    {
-        /// we limit the amount of retries during initialization phase because shutdown won't be set
-        /// before initialization is done, i.e. we would be stuck in infinite loop
-        size_t retry_num = 0;
-        do
-        {
-            try
-            {
-                op();
-                return true;
-            }
-            catch (...)
-            {
-                tryLogCurrentException(
-                    logger,
-                    fmt::format(
-                        "While moving changelog {} to disk {} and running '{}'",
-                        description->path,
-                        disk_to->getName(),
-                        operation_description));
-                std::this_thread::sleep_for(retries_sleep);
-            }
-
-            ++retry_num;
-            if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init)
-            {
-                LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description);
-                break;
-            }
-
-        } while (!keeper_context->isShutdownCalled());
-
-        LOG_ERROR(
-            getLogger("Changelog"),
-            "Failed to run '{}' while moving changelog {} to disk {}",
-            operation_description,
-            description->path,
-            disk_to->getName());
-        return false;
-    };
-
-    std::array<std::pair<std::function<void()>, std::string_view>, 4> operations{
-        std::pair{
-            [&]
-            {
-                auto buf = disk_to->writeFile(tmp_changelog_name);
-                buf->finalize();
-            },
-            "creating temporary file"},
-        std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"},
-        std::pair{[&] { disk_to->removeFileIfExists(tmp_changelog_name); }, "removing temporary file"},
-        std::pair{[&] { disk_from->removeFileIfExists(description->path); }, "removing changelog file from source disk"},
-    };
-
-    for (const auto & [op, operation_description] : operations)
-    {
-        if (!run_with_retries(op, operation_description))
-            return;
-    }
-
+    moveFileBetweenDisks(
+        disk_from, description->path, disk_to, path_to, getLogger("Changelog"), keeper_context);
     description->path = path_to;
     description->disk = disk_to;
 }
@@ -240,7 +167,7 @@ public:
                     }
                     else
                     {
-                        moveFileBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context);
+                        moveChangelogBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context);
                     }
                 }
             }
@@ -715,9 +642,9 @@ Changelog::Changelog(
             if (file_name == changelogs_detached_dir)
                 continue;
 
-            if (file_name.starts_with(tmp_prefix))
+            if (file_name.starts_with(tmp_keeper_file_prefix))
             {
-                incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path());
+                incomplete_files.emplace(file_name.substr(tmp_keeper_file_prefix.size()), it->path());
                 continue;
             }
 
@@ -880,7 +807,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         auto disk = getDisk();
 
         if (latest_log_disk != disk && latest_log_disk == description->disk)
-            moveFileBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context);
+            moveChangelogBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context);
     };
 
     /// we can have empty log (with zero entries) and last_log_read_result will be initialized
@@ -966,7 +893,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         }
 
         if (description->disk != disk)
-            moveFileBetweenDisks(description->disk, description, disk, description->path, keeper_context);
+            moveChangelogBetweenDisks(description->disk, description, disk, description->path, keeper_context);
     }
 
 
@@ -988,7 +915,7 @@ void Changelog::initWriter(ChangelogFileDescriptionPtr description)
     auto log_disk = description->disk;
     auto latest_log_disk = getLatestLogDisk();
     if (log_disk != latest_log_disk)
-        moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
+        moveChangelogBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
 
     current_writer->setFile(std::move(description), WriteMode::Append);
 }
@@ -1051,11 +978,11 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
             catch (const DB::Exception & e)
             {
                 if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED)
-                    moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
+                    moveChangelogBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
             }
         }
         else
-            moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
+            moveChangelogBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context);
 
         itr = existing_changelogs.erase(itr);
     }
@@ -1266,7 +1193,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
             auto log_disk = description->disk;
             auto latest_log_disk = getLatestLogDisk();
             if (log_disk != latest_log_disk)
-                moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
+                moveChangelogBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context);
 
             current_writer->setFile(std::move(description), WriteMode::Append);
 
diff --git a/src/Coordination/KeeperCommon.cpp b/src/Coordination/KeeperCommon.cpp
new file mode 100644
index 00000000000..820abf1bbbe
--- /dev/null
+++ b/src/Coordination/KeeperCommon.cpp
@@ -0,0 +1,118 @@
+#include <Coordination/KeeperCommon.h>
+
+#include <string>
+#include <filesystem>
+
+#include <Common/logger_useful.h>
+#include <Disks/IDisk.h>
+#include <Coordination/KeeperContext.h>
+#include <Coordination/CoordinationSettings.h>
+
+namespace DB
+{
+
+static size_t findLastSlash(StringRef path)
+{
+    if (path.size == 0)
+        return std::string::npos;
+
+    for (size_t i = path.size - 1; i > 0; --i)
+    {
+        if (path.data[i] == '/')
+            return i;
+    }
+
+    if (path.data[0] == '/')
+        return 0;
+
+    return std::string::npos;
+}
+
+StringRef parentNodePath(StringRef path)
+{
+    auto rslash_pos = findLastSlash(path);
+    if (rslash_pos > 0)
+        return StringRef{path.data, rslash_pos};
+    return "/";
+}
+
+StringRef getBaseNodeName(StringRef path)
+{
+    size_t basename_start = findLastSlash(path);
+    return StringRef{path.data + basename_start + 1, path.size - basename_start - 1};
+}
+
+void moveFileBetweenDisks(
+    DiskPtr disk_from,
+    const std::string & path_from,
+    DiskPtr disk_to,
+    const std::string & path_to,
+    LoggerPtr logger,
+    const KeeperContextPtr & keeper_context)
+{
+    LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName());
+    /// we use empty file with prefix tmp_ to detect incomplete copies
+    /// if a copy is complete we don't care from which disk we use the same file
+    /// so it's okay if a failure happens after removing of tmp file but before we remove
+    /// the file from the source disk
+    auto from_path = fs::path(path_from);
+    auto tmp_file_name = from_path.parent_path() / (std::string{tmp_keeper_file_prefix} + from_path.filename().string());
+
+    const auto & coordination_settings = keeper_context->getCoordinationSettings();
+    auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
+    auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
+    auto run_with_retries = [&](const auto & op, std::string_view operation_description)
+    {
+        size_t retry_num = 0;
+        do
+        {
+            try
+            {
+                op();
+                return true;
+            }
+            catch (...)
+            {
+                tryLogCurrentException(
+                    logger,
+                    fmt::format(
+                        "While moving file {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description));
+                std::this_thread::sleep_for(retries_sleep);
+            }
+
+            ++retry_num;
+            if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init)
+            {
+                LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description);
+                break;
+            }
+        } while (!keeper_context->isShutdownCalled());
+
+        LOG_ERROR(
+            logger,
+            "Failed to run '{}' while moving file {} to disk {}",
+            operation_description,
+            path_from,
+            disk_to->getName());
+        return false;
+    };
+
+    if (!run_with_retries(
+            [&]
+            {
+                auto buf = disk_to->writeFile(tmp_file_name);
+                buf->finalize();
+            },
+            "creating temporary file"))
+        return;
+
+    if (!run_with_retries([&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"))
+        return;
+
+    if (!run_with_retries([&] { disk_to->removeFileIfExists(tmp_file_name); }, "removing temporary file"))
+        return;
+
+    if (!run_with_retries([&] { disk_from->removeFileIfExists(path_from); }, "removing file from source disk"))
+        return;
+}
+}
diff --git a/src/Coordination/KeeperCommon.h b/src/Coordination/KeeperCommon.h
new file mode 100644
index 00000000000..179d80b295f
--- /dev/null
+++ b/src/Coordination/KeeperCommon.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <base/StringRef.h>
+#include "Common/Logger.h"
+
+namespace DB
+{
+
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+class KeeperContext;
+using KeeperContextPtr = std::shared_ptr<KeeperContext>;
+
+StringRef parentNodePath(StringRef path);
+
+StringRef getBaseNodeName(StringRef path);
+
+inline static constexpr std::string_view tmp_keeper_file_prefix = "tmp_";
+
+void moveFileBetweenDisks(
+    DiskPtr disk_from,
+    const std::string & path_from,
+    DiskPtr disk_to,
+    const std::string & path_to,
+    LoggerPtr logger,
+    const KeeperContextPtr & keeper_context);
+
+}
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index 61bcdf023cf..4ae39487e0b 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -14,7 +14,7 @@
 #include <memory>
 #include <Common/logger_useful.h>
 #include <Coordination/KeeperContext.h>
-#include <Coordination/pathUtils.h>
+#include <Coordination/KeeperCommon.h>
 #include <Coordination/KeeperConstants.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Core/Field.h>
@@ -33,79 +33,15 @@ namespace ErrorCodes
 
 namespace
 {
-    constexpr std::string_view tmp_prefix = "tmp_";
-
-    void moveFileBetweenDisks(
+    void moveSnapshotFileBetweenDisks(
         DiskPtr disk_from,
         const std::string & path_from,
         DiskPtr disk_to,
         const std::string & path_to,
         const KeeperContextPtr & keeper_context)
     {
-        auto logger = getLogger("KeeperSnapshotManager");
-        LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName());
-        /// we use empty file with prefix tmp_ to detect incomplete copies
-        /// if a copy is complete we don't care from which disk we use the same file
-        /// so it's okay if a failure happens after removing of tmp file but before we remove
-        /// the snapshot from the source disk
-        auto from_path = fs::path(path_from);
-        auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string());
-
-        const auto & coordination_settings = keeper_context->getCoordinationSettings();
-        auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
-        auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
-        auto run_with_retries = [&](const auto & op, std::string_view operation_description)
-        {
-            size_t retry_num = 0;
-            do
-            {
-                try
-                {
-                    op();
-                    return true;
-                }
-                catch (...)
-                {
-                    tryLogCurrentException(
-                        logger,
-                        fmt::format(
-                            "While moving snapshot {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description));
-                    std::this_thread::sleep_for(retries_sleep);
-                }
-
-                ++retry_num;
-                if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init)
-                {
-                    LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description);
-                    break;
-                }
-            } while (!keeper_context->isShutdownCalled());
-
-            LOG_ERROR(
-                logger,
-                "Failed to run '{}' while moving snapshot {} to disk {}",
-                operation_description,
-                path_from,
-                disk_to->getName());
-            return false;
-        };
-
-        std::array<std::pair<std::function<void()>, std::string_view>, 4> operations{
-            std::pair{
-                [&]
-                {
-                    auto buf = disk_to->writeFile(tmp_snapshot_name);
-                    buf->finalize();
-                },
-                "creating temporary file"},
-            std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"},
-            std::pair{[&] { disk_to->removeFileIfExists(tmp_snapshot_name); }, "removing temporary file"},
-            std::pair{[&] { disk_from->removeFileIfExists(path_from); }, "removing snapshot file from source disk"},
-        };
-
-        for (const auto & [op, operation_description] : operations)
-            if (!run_with_retries(op, operation_description))
-                return;
+        moveFileBetweenDisks(
+            std::move(disk_from), path_from, std::move(disk_to), path_to, getLogger("KeeperSnapshotManager"), keeper_context);
     }
 
     uint64_t getSnapshotPathUpToLogIdx(const String & snapshot_path)
@@ -639,9 +575,9 @@ KeeperSnapshotManager::KeeperSnapshotManager(
         std::vector<std::string> snapshot_files;
         for (auto it = disk->iterateDirectory(""); it->isValid(); it->next())
         {
-            if (it->name().starts_with(tmp_prefix))
+            if (it->name().starts_with(tmp_keeper_file_prefix))
             {
-                incomplete_files.emplace(it->name().substr(tmp_prefix.size()), it->path());
+                incomplete_files.emplace(it->name().substr(tmp_keeper_file_prefix.size()), it->path());
                 continue;
             }
 
@@ -831,7 +767,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
         {
             if (file_info.disk != latest_snapshot_disk)
             {
-                moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context);
+                moveSnapshotFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context);
                 file_info.disk = latest_snapshot_disk;
             }
         }
@@ -839,7 +775,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
         {
             if (file_info.disk != disk)
             {
-                moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context);
+                moveSnapshotFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context);
                 file_info.disk = disk;
             }
         }
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index eaa0c3c9e68..9321fb33163 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -18,7 +18,7 @@
 #include <Common/LockMemoryExceptionInThread.h>
 #include <Common/ProfileEvents.h>
 
-#include <Coordination/pathUtils.h>
+#include <Coordination/KeeperCommon.h>
 #include <Coordination/KeeperConstants.h>
 #include <Coordination/KeeperReconfiguration.h>
 #include <Coordination/KeeperStorage.h>
@@ -26,7 +26,6 @@
 
 #include <functional>
 #include <base/defines.h>
-#include <filesystem>
 
 namespace ProfileEvents
 {
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index c7b1abf1d83..c205db942b9 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -8,7 +8,7 @@
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/logger_useful.h>
 #include <IO/ReadBufferFromFile.h>
-#include <Coordination/pathUtils.h>
+#include <Coordination/KeeperCommon.h>
 
 
 namespace DB
diff --git a/src/Coordination/pathUtils.cpp b/src/Coordination/pathUtils.cpp
deleted file mode 100644
index 25f8e25cf06..00000000000
--- a/src/Coordination/pathUtils.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <Coordination/pathUtils.h>
-
-namespace DB
-{
-
-static size_t findLastSlash(StringRef path)
-{
-    if (path.size == 0)
-        return std::string::npos;
-
-    for (size_t i = path.size - 1; i > 0; --i)
-    {
-        if (path.data[i] == '/')
-            return i;
-    }
-
-    if (path.data[0] == '/')
-        return 0;
-
-    return std::string::npos;
-}
-
-StringRef parentNodePath(StringRef path)
-{
-    auto rslash_pos = findLastSlash(path);
-    if (rslash_pos > 0)
-        return StringRef{path.data, rslash_pos};
-    return "/";
-}
-
-StringRef getBaseNodeName(StringRef path)
-{
-    size_t basename_start = findLastSlash(path);
-    return StringRef{path.data + basename_start + 1, path.size - basename_start - 1};
-}
-
-}
diff --git a/src/Coordination/pathUtils.h b/src/Coordination/pathUtils.h
deleted file mode 100644
index b2b79b14110..00000000000
--- a/src/Coordination/pathUtils.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include <string>
-#include <base/StringRef.h>
-
-namespace DB
-{
-
-StringRef parentNodePath(StringRef path);
-
-StringRef getBaseNodeName(StringRef path);
-
-}
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index c6d98e6f3dd..763804ba389 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1,8 +1,6 @@
 #include <chrono>
 #include <gtest/gtest.h>
-#include "Common/ZooKeeper/IKeeper.h"
 
-#include "Core/Defines.h"
 #include "config.h"
 
 #if USE_NURAFT
@@ -22,7 +20,7 @@
 #include <Coordination/ReadBufferFromNuraftBuffer.h>
 #include <Coordination/SummingStateMachine.h>
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
-#include <Coordination/pathUtils.h>
+#include <Coordination/KeeperCommon.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <libnuraft/nuraft.hxx>

From 32130d7f78c28ea688af728744ca1bb2e989b576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 15:55:22 +0100
Subject: [PATCH 854/884] Fix low cardinality too

---
 src/Functions/array/arrayReduce.cpp           | 26 +++++++++++++------
 .../02996_nullable_arrayReduce.reference      |  3 ++-
 .../02996_nullable_arrayReduce.sql            |  5 +++-
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp
index 4e192cd7d99..d47d1ae98cc 100644
--- a/src/Functions/array/arrayReduce.cpp
+++ b/src/Functions/array/arrayReduce.cpp
@@ -1,14 +1,15 @@
-#include <Functions/IFunction.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <DataTypes/DataTypeArray.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnAggregateFunction.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Combinators/AggregateFunctionState.h>
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/parseAggregateFunctionParameters.h>
+#include <Columns/ColumnAggregateFunction.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
 #include <Common/Arena.h>
 
 #include <Common/scope_guard_safe.h>
@@ -51,6 +52,8 @@ public:
     /// As we parse the function name and deal with arrays we don't want to default NULL handler, which will hide
     /// nullability from us (which also means hidden from the aggregate functions)
     bool useDefaultImplementationForNulls() const override { return false; }
+    /// Same for low cardinality. We want to return exactly what the aggregate function returns, no meddling
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
@@ -118,7 +121,8 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
     const IAggregateFunction & agg_func = *aggregate_function;
     std::unique_ptr<Arena> arena = std::make_unique<Arena>();
 
-    /// Aggregate functions do not support constant columns. Therefore, we materialize them.
+    /// Aggregate functions do not support constant or lowcardinality columns. Therefore, we materialize them and
+    /// keep a reference so they are alive until we finish using their nested columns (array data/offset)
     std::vector<ColumnPtr> materialized_columns;
 
     const size_t num_arguments_columns = arguments.size() - 1;
@@ -129,6 +133,12 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
     for (size_t i = 0; i < num_arguments_columns; ++i)
     {
         const IColumn * col = arguments[i + 1].column.get();
+        auto col_no_lowcardinality = recursiveRemoveLowCardinality(arguments[i + 1].column);
+        if (col_no_lowcardinality != arguments[i + 1].column)
+        {
+            materialized_columns.emplace_back(col_no_lowcardinality);
+            col = col_no_lowcardinality.get();
+        }
 
         const ColumnArray::Offsets * offsets_i = nullptr;
         if (const ColumnArray * arr = checkAndGetColumn<ColumnArray>(col))
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
index c6f369c1237..96afb8546ef 100644
--- a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
+++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference
@@ -11,4 +11,5 @@ SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a
 \N	Nullable(UInt8)
 SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
 10	Nullable(UInt8)
-SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t);
+8	Nullable(Float64)
diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
index 26697d2f10c..8f69296dbe5 100644
--- a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
+++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql
@@ -2,6 +2,8 @@
 SELECT arrayReduce(toNullable('stddevSampOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT arrayReduce(toNullable('median'), [toDecimal32OrNull(toFixedString('1', 1), 2), 8]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT toFixedString('--- Int Empty ---', toLowCardinality(17)), arrayReduce(toNullable('avgOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayReduce(toLowCardinality('median'), [toLowCardinality(toNullable(8))]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 -- { echoOn }
 SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a);
@@ -11,4 +13,5 @@ SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a
 SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a);
 SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a);
 
-SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t);
+-- { echoOff }

From f97d7bd0ab21b1c917943c0ec89c196f671162da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 16:24:30 +0100
Subject: [PATCH 855/884] Not sure why it's running with aarch64

---
 tests/queries/0_stateless/02995_new_settings_history.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh
index 8932e00086b..dca9882eaf7 100755
--- a/tests/queries/0_stateless/02995_new_settings_history.sh
+++ b/tests/queries/0_stateless/02995_new_settings_history.sh
@@ -1,14 +1,13 @@
 #!/usr/bin/env bash
+# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings
+# Some settings can be different for builds with sanitizers or aarch64
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-cpu-aarch64
-# Some settings can be different for builds with sanitizers
-
 # Note that this is a broad check. A per version check is done in the upgrade test
-# Baseline generated with 23.12
+# Baseline generated with 23.12.1
 # clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv
 $CLICKHOUSE_LOCAL --query "
     WITH old_settings AS

From 738d1b1ddd71ead034949199c9756e4040dc71d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 20 Feb 2024 18:24:35 +0100
Subject: [PATCH 856/884] Ignore dynamic defaults

---
 tests/queries/0_stateless/02995_new_settings_history.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh
index dca9882eaf7..8de98c55b6a 100755
--- a/tests/queries/0_stateless/02995_new_settings_history.sh
+++ b/tests/queries/0_stateless/02995_new_settings_history.sh
@@ -16,7 +16,8 @@ $CLICKHOUSE_LOCAL --query "
     ),
     new_settings AS
     (
-        select name, default from system.settings order by name
+        -- Ignore settings that depend on the machine config (max_threads and similar)
+        SELECT name, default FROM system.settings WHERE default NOT LIKE '%auto(%'
     )
     SELECT * FROM
     (

From db974098d073496d640bf08f0e43f3aa572ad3ff Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 19:54:12 +0000
Subject: [PATCH 857/884] More refactoring and cleanup

---
 .../functions/tuple-functions.md              |   2 +-
 src/Functions/array/arrayDotProduct.cpp       | 189 ++++++++++++++++--
 src/Functions/array/arrayScalarProduct.h      | 180 -----------------
 3 files changed, 175 insertions(+), 196 deletions(-)
 delete mode 100644 src/Functions/array/arrayScalarProduct.h

diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 5930239dc56..b089de67e98 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -542,7 +542,7 @@ Alias: `scalarProduct`.
 
 - Scalar product.
 
-Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 180f85499cd..8a63d11de7d 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -1,11 +1,13 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionFactory.h>
-#include <Core/Types_fwd.h>
-#include <DataTypes/Serializations/ISerialization.h>
-#include <Functions/castTypeToEither.h>
-#include <Functions/array/arrayScalarProduct.h>
-#include <base/types.h>
 #include <Functions/FunctionBinaryArithmetic.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
+#include <Functions/castTypeToEither.h>
+#include <Interpreters/Context_fwd.h>
+#include <base/types.h>
 
 
 namespace DB
@@ -13,9 +15,163 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int LOGICAL_ERROR;
 }
 
+template <typename Impl, typename Name>
+class FunctionArrayScalarProduct : public IFunction
+{
+public:
+    static constexpr auto name = Name::name;
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        std::array<DataTypePtr, 2> nested_types;
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
+            if (!array_type)
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Arguments for function {} must be of type Array", getName());
+
+            const auto & nested_type = array_type->getNestedType();
+            if (!isNativeNumber(nested_type))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Function {} cannot process values of type {}", getName(), nested_type->getName());
+
+            nested_types[i] = nested_type;
+        }
+
+        return Impl::getReturnType(nested_types[0], nested_types[1]);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
+    {
+        switch (result_type->getTypeId())
+        {
+        #define SUPPORTED_TYPE(type) \
+            case TypeIndex::type: \
+                return executeWithResultType<type>(arguments); \
+                break;
+
+            SUPPORTED_TYPE(UInt8)
+            SUPPORTED_TYPE(UInt16)
+            SUPPORTED_TYPE(UInt32)
+            SUPPORTED_TYPE(UInt64)
+            SUPPORTED_TYPE(Int8)
+            SUPPORTED_TYPE(Int16)
+            SUPPORTED_TYPE(Int32)
+            SUPPORTED_TYPE(Int64)
+            SUPPORTED_TYPE(Float32)
+            SUPPORTED_TYPE(Float64)
+        #undef SUPPORTED_TYPE
+
+            default:
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName());
+        }
+    }
+
+private:
+    template <typename ResultType>
+    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const
+    {
+        ColumnPtr res;
+        if (!((res = executeWithResultTypeAndLeft<ResultType, UInt8>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, UInt16>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, UInt32>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, UInt64>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Int8>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Int16>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Int32>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Int64>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Float32>(arguments))
+            || (res = executeWithResultTypeAndLeft<ResultType, Float64>(arguments))))
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
+
+        return res;
+    }
+
+    template <typename ResultType, typename LeftType>
+    ColumnPtr executeWithResultTypeAndLeft(const ColumnsWithTypeAndName & arguments) const
+    {
+        ColumnPtr res;
+        if (   (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt8>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt16>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt32>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt64>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int8>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int16>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int32>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int64>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Float32>(arguments))
+            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Float64>(arguments)))
+            return res;
+
+       return nullptr;
+    }
+
+    template <typename ResultType, typename LeftType, typename RightType>
+    ColumnPtr executeWithResultTypeAndLeftAndRight(const ColumnsWithTypeAndName & arguments) const
+    {
+        ColumnPtr col_left = arguments[0].column->convertToFullColumnIfConst();
+        ColumnPtr col_right = arguments[1].column->convertToFullColumnIfConst();
+        if (!col_left || !col_right)
+            return nullptr;
+
+        const ColumnArray * col_arr_left = checkAndGetColumn<ColumnArray>(col_left.get());
+        const ColumnArray * cokl_arr_right = checkAndGetColumn<ColumnArray>(col_right.get());
+        if (!col_arr_left || !cokl_arr_right)
+            return nullptr;
+
+        const ColumnVector<LeftType> * col_arr_nested_left = checkAndGetColumn<ColumnVector<LeftType>>(col_arr_left->getData());
+        const ColumnVector<RightType> * col_arr_nested_right = checkAndGetColumn<ColumnVector<RightType>>(cokl_arr_right->getData());
+        if (!col_arr_nested_left || !col_arr_nested_right)
+            return nullptr;
+
+        if (!col_arr_left->hasEqualOffsets(*cokl_arr_right))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName());
+
+        auto col_res = ColumnVector<ResultType>::create();
+
+        vector(
+            col_arr_nested_left->getData(),
+            col_arr_nested_right->getData(),
+            col_arr_left->getOffsets(),
+            col_res->getData());
+
+        return col_res;
+    }
+
+    template <typename ResultType, typename LeftType, typename RightType>
+    static NO_INLINE void vector(
+        const PaddedPODArray<LeftType> & left,
+        const PaddedPODArray<RightType> & right,
+        const ColumnArray::Offsets & offsets,
+        PaddedPODArray<ResultType> & result)
+    {
+        size_t size = offsets.size();
+        result.resize(size);
+
+        ColumnArray::Offset current_offset = 0;
+        for (size_t i = 0; i < size; ++i)
+        {
+            size_t array_size = offsets[i] - current_offset;
+            result[i] = Impl::template apply<ResultType, LeftType, RightType>(&left[current_offset], &right[current_offset], array_size);
+            current_offset = offsets[i];
+        }
+    }
+
+};
+
 struct NameArrayDotProduct
 {
     static constexpr auto name = "arrayDotProduct";
@@ -29,16 +185,18 @@ public:
         using Types = TypeList<DataTypeFloat32, DataTypeFloat64,
                                DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
                                DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64>;
+        Types types;
 
         DataTypePtr result_type;
-        bool valid = castTypeToEither(Types{}, left.get(), [&](const auto & left_)
+        bool valid = castTypeToEither(types, left.get(), [&](const auto & left_)
         {
-            return castTypeToEither(Types{}, right.get(), [&](const auto & right_)
+            return castTypeToEither(types, right.get(), [&](const auto & right_)
             {
-                using LeftDataType = typename std::decay_t<decltype(left_)>::FieldType;
-                using RightDataType = typename std::decay_t<decltype(right_)>::FieldType;
-                using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<LeftDataType, RightDataType>::Type;
-                if (std::is_same_v<LeftDataType, Float32> && std::is_same_v<RightDataType, Float32>)
+                using LeftType = typename std::decay_t<decltype(left_)>::FieldType;
+                using RightType = typename std::decay_t<decltype(right_)>::FieldType;
+                using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<LeftType, RightType>::Type;
+
+                if constexpr (std::is_same_v<LeftType, Float32> && std::is_same_v<RightType, Float32>)
                     result_type = std::make_shared<DataTypeFloat32>();
                 else
                     result_type = std::make_shared<DataTypeFromFieldType<ResultType>>();
@@ -54,10 +212,10 @@ public:
         return result_type;
     }
 
-    template <typename ResultType, typename T, typename U>
+    template <typename ResultType, typename LeftType, typename RightType>
     static NO_SANITIZE_UNDEFINED ResultType apply(
-        const T * left,
-        const U * right,
+        const LeftType * left,
+        const RightType * right,
         size_t size)
     {
         ResultType result = 0;
@@ -76,4 +234,5 @@ REGISTER_FUNCTION(ArrayDotProduct)
 
 // These functions are used by TupleOrArrayFunction in Function/vectorFunctions.cpp
 FunctionPtr createFunctionArrayDotProduct(ContextPtr context_) { return FunctionArrayDotProduct::create(context_); }
+
 }
diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h
deleted file mode 100644
index 2bef11eade5..00000000000
--- a/src/Functions/array/arrayScalarProduct.h
+++ /dev/null
@@ -1,180 +0,0 @@
-#pragma once
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnVector.h>
-#include <DataTypes/DataTypeArray.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include <Interpreters/Context_fwd.h>
-#include <Core/TypeId.h>
-
-
-namespace DB
-{
-
-class Context;
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int LOGICAL_ERROR;
-}
-
-
-template <typename Method, typename Name>
-class FunctionArrayScalarProduct : public IFunction
-{
-public:
-    static constexpr auto name = Name::name;
-
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 2; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        // Basic type check
-        std::vector<DataTypePtr> nested_types(2, nullptr);
-        for (size_t i = 0; i < 2; ++i)
-        {
-            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
-            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Arguments for function {} must be of type Array", getName());
-
-            const auto & nested_type = array_type->getNestedType();
-            if (!isNativeNumber(nested_type) && !isEnum(nested_type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Function {} cannot process values of type {}", getName(), nested_type->getName());
-
-            nested_types[i] = nested_type;
-        }
-
-        // Perform further type checks in Method
-        return Method::getReturnType(nested_types[0], nested_types[1]);
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
-    {
-        switch (result_type->getTypeId())
-        {
-        #define SUPPORTED_TYPE(type) \
-            case TypeIndex::type: \
-                return executeWithResultType<type>(arguments); \
-                break;
-
-            SUPPORTED_TYPE(UInt8)
-            SUPPORTED_TYPE(UInt16)
-            SUPPORTED_TYPE(UInt32)
-            SUPPORTED_TYPE(UInt64)
-            SUPPORTED_TYPE(Int8)
-            SUPPORTED_TYPE(Int16)
-            SUPPORTED_TYPE(Int32)
-            SUPPORTED_TYPE(Int64)
-            SUPPORTED_TYPE(Float32)
-            SUPPORTED_TYPE(Float64)
-        #undef SUPPORTED_TYPE
-
-            default:
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName());
-        }
-    }
-
-private:
-    template <typename ResultType>
-    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const
-    {
-        ColumnPtr res;
-        if (!((res = executeNumber<ResultType, UInt8>(arguments))
-            || (res = executeNumber<ResultType, UInt16>(arguments))
-            || (res = executeNumber<ResultType, UInt32>(arguments))
-            || (res = executeNumber<ResultType, UInt64>(arguments))
-            || (res = executeNumber<ResultType, Int8>(arguments))
-            || (res = executeNumber<ResultType, Int16>(arguments))
-            || (res = executeNumber<ResultType, Int32>(arguments))
-            || (res = executeNumber<ResultType, Int64>(arguments))
-            || (res = executeNumber<ResultType, Float32>(arguments))
-            || (res = executeNumber<ResultType, Float64>(arguments))))
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
-
-        return res;
-    }
-
-    template <typename ResultType, typename T>
-    ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments) const
-    {
-        ColumnPtr res;
-        if (   (res = executeNumberNumber<ResultType, T, UInt8>(arguments))
-            || (res = executeNumberNumber<ResultType, T, UInt16>(arguments))
-            || (res = executeNumberNumber<ResultType, T, UInt32>(arguments))
-            || (res = executeNumberNumber<ResultType, T, UInt64>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Int8>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Int16>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Int32>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Int64>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Float32>(arguments))
-            || (res = executeNumberNumber<ResultType, T, Float64>(arguments)))
-            return res;
-
-       return nullptr;
-    }
-
-    template <typename ResultType, typename T, typename U>
-    ColumnPtr executeNumberNumber(const ColumnsWithTypeAndName & arguments) const
-    {
-        ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst();
-        ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst();
-        if (!col1 || !col2)
-            return nullptr;
-
-        const ColumnArray * col_array1 = checkAndGetColumn<ColumnArray>(col1.get());
-        const ColumnArray * col_array2 = checkAndGetColumn<ColumnArray>(col2.get());
-        if (!col_array1 || !col_array2)
-            return nullptr;
-
-        if (!col_array1->hasEqualOffsets(*col_array2))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName());
-
-        const ColumnVector<T> * col_nested1 = checkAndGetColumn<ColumnVector<T>>(col_array1->getData());
-        const ColumnVector<U> * col_nested2 = checkAndGetColumn<ColumnVector<U>>(col_array2->getData());
-        if (!col_nested1 || !col_nested2)
-            return nullptr;
-
-        auto col_res = ColumnVector<ResultType>::create();
-
-        vector(
-            col_nested1->getData(),
-            col_nested2->getData(),
-            col_array1->getOffsets(),
-            col_res->getData());
-
-        return col_res;
-    }
-
-    template <typename ResultType, typename T, typename U>
-    static NO_INLINE void vector(
-        const PaddedPODArray<T> & data1,
-        const PaddedPODArray<U> & data2,
-        const ColumnArray::Offsets & offsets,
-        PaddedPODArray<ResultType> & result)
-    {
-        size_t size = offsets.size();
-        result.resize(size);
-
-        ColumnArray::Offset current_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            size_t array_size = offsets[i] - current_offset;
-            result[i] = Method::template apply<ResultType, T, U>(&data1[current_offset], &data2[current_offset], array_size);
-            current_offset = offsets[i];
-        }
-    }
-
-};
-
-}
-

From 877dc695f29b0df163a847ad323eaf2d20552af5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 20:09:54 +0000
Subject: [PATCH 858/884] Make the tests more systematic

---
 .../0_stateless/02708_dotProduct.reference    | 34 ++++++++++++
 .../queries/0_stateless/02708_dotProduct.sql  | 47 ++++++++++++++++
 .../0_stateless/02708_dot_product.reference   | 14 -----
 .../queries/0_stateless/02708_dot_product.sql | 55 -------------------
 4 files changed, 81 insertions(+), 69 deletions(-)
 create mode 100644 tests/queries/0_stateless/02708_dotProduct.reference
 create mode 100644 tests/queries/0_stateless/02708_dotProduct.sql
 delete mode 100644 tests/queries/0_stateless/02708_dot_product.reference
 delete mode 100644 tests/queries/0_stateless/02708_dot_product.sql

diff --git a/tests/queries/0_stateless/02708_dotProduct.reference b/tests/queries/0_stateless/02708_dotProduct.reference
new file mode 100644
index 00000000000..5cc9a9f0502
--- /dev/null
+++ b/tests/queries/0_stateless/02708_dotProduct.reference
@@ -0,0 +1,34 @@
+-- Negative tests
+-- Tests
+   -- Array
+[1,2,3]	[4,5,6]	32	UInt16
+[1,2,3]	[4,5,6]	32	UInt32
+[1,2,3]	[4,5,6]	32	UInt64
+[1,2,3]	[4,5,6]	32	UInt64
+[-1,-2,-3]	[4,5,6]	-32	Int16
+[-1,-2,-3]	[4,5,6]	-32	Int32
+[-1,-2,-3]	[4,5,6]	-32	Int64
+[-1,-2,-3]	[4,5,6]	-32	Int64
+[1,2,3]	[4,5,6]	32	Float32
+[1,2,3]	[4,5,6]	32	Float64
+   -- Tuple
+(1,2,3)	(4,5,6)	32	UInt64
+(1,2,3)	(4,5,6)	32	UInt64
+(1,2,3)	(4,5,6)	32	UInt64
+(1,2,3)	(4,5,6)	32	UInt64
+(-1,-2,-3)	(4,5,6)	-32	Int64
+(-1,-2,-3)	(4,5,6)	-32	Int64
+(-1,-2,-3)	(4,5,6)	-32	Int64
+(-1,-2,-3)	(4,5,6)	-32	Int64
+(1,2,3)	(4,5,6)	32	Float64
+(1,2,3)	(4,5,6)	32	Float64
+-- Non-const argument
+[1,2,3]	[4,5,6]	32	UInt16
+ -- Array with mixed element arguments types (result type is the supertype)
+[1,2,3]	[4,5,6]	32	Float32
+ -- Tuple with mixed element arguments types
+(1,2,3)	(4,5,6)	32	Float64
+-- Aliases
+32
+32
+32
diff --git a/tests/queries/0_stateless/02708_dotProduct.sql b/tests/queries/0_stateless/02708_dotProduct.sql
new file mode 100644
index 00000000000..6ad615664e8
--- /dev/null
+++ b/tests/queries/0_stateless/02708_dotProduct.sql
@@ -0,0 +1,47 @@
+SELECT '-- Negative tests';
+
+SELECT arrayDotProduct([1, 2]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT arrayDotProduct([1, 2], 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayDotProduct('abc', [1, 2]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayDotProduct([1, 2], ['abc', 'def']); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT arrayDotProduct([1, 2], [3, 4, 5]); -- { serverError BAD_ARGUMENTS }
+SELECT dotProduct([1, 2], (3, 4, 5)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT '-- Tests';
+SELECT '   -- Array';
+SELECT [1, 2, 3]::Array(UInt8) AS x, [4, 5, 6]::Array(UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [1, 2, 3]::Array(UInt16) AS x, [4, 5, 6]::Array(UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [1, 2, 3]::Array(UInt32) AS x, [4, 5, 6]::Array(UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [1, 2, 3]::Array(UInt64) AS x, [4, 5, 6]::Array(UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [-1, -2, -3]::Array(Int8) AS x, [4, 5, 6]::Array(Int8) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [-1, -2, -3]::Array(Int16) AS x, [4, 5, 6]::Array(Int16) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [-1, -2, -3]::Array(Int32) AS x, [4, 5, 6]::Array(Int32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [-1, -2, -3]::Array(Int64) AS x, [4, 5, 6]::Array(Int64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [1, 2, 3]::Array(Float32) AS x, [4, 5, 6]::Array(Float32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT [1, 2, 3]::Array(Float64) AS x, [4, 5, 6]::Array(Float64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+
+SELECT '   -- Tuple';
+SELECT (1::UInt8, 2::UInt8, 3::UInt8) AS x, (4::UInt8, 5::UInt8, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (1::UInt16, 2::UInt16, 3::UInt16) AS x, (4::UInt16, 5::UInt16, 6::UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (1::UInt32, 2::UInt32, 3::UInt32) AS x, (4::UInt32, 5::UInt32, 6::UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (1::UInt64, 2::UInt64, 3::UInt64) AS x, (4::UInt64, 5::UInt64, 6::UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (-1::Int8, -2::Int8, -3::Int8) AS x, (4::Int8, 5::Int8, 6::Int8) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (-1::Int16, -2::Int16, -3::Int16) AS x, (4::Int16, 5::Int16, 6::Int16) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (-1::Int32, -2::Int32, -3::Int32) AS x, (4::Int32, 5::Int32, 6::Int32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (-1::Int64, -2::Int64, -3::Int64) AS x, (4::Int64, 5::Int64, 6::Int64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (1::Float32, 2::Float32, 3::Float32) AS x, (4::Float32, 5::Float32, 6::Float32) AS y, dotProduct(x, y) AS res, toTypeName(res);
+SELECT (1::Float64, 2::Float64, 3::Float64) AS x, (4::Float64, 5::Float64, 6::Float64) AS y, dotProduct(x, y) AS res, toTypeName(res);
+
+SELECT '-- Non-const argument';
+SELECT materialize([1::UInt8, 2::UInt8, 3::UInt8]) AS x, [4::UInt8, 5::UInt8, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res);
+
+SELECT ' -- Array with mixed element arguments types (result type is the supertype)';
+SELECT [1::UInt16, 2::UInt8, 3::Float32] AS x, [4::Int16, 5::Float32, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res);
+
+SELECT ' -- Tuple with mixed element arguments types';
+SELECT (1::UInt16, 2::UInt8, 3::Float32) AS x, (4::Int16, 5::Float32, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res);
+
+SELECT '-- Aliases';
+SELECT scalarProduct([1, 2, 3], [4, 5, 6]);
+SELECT scalarProduct((1, 2, 3), (4, 5, 6));
+SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]); -- actually no alias but the internal function for arrays
diff --git a/tests/queries/0_stateless/02708_dot_product.reference b/tests/queries/0_stateless/02708_dot_product.reference
deleted file mode 100644
index 45e53871aa2..00000000000
--- a/tests/queries/0_stateless/02708_dot_product.reference
+++ /dev/null
@@ -1,14 +0,0 @@
-3881.304
-3881.304
-3881.304
-376.5
-230
-0
-0
-Float64
-Float32
-Float64
-Float64
-UInt16
-UInt64
-Int64
diff --git a/tests/queries/0_stateless/02708_dot_product.sql b/tests/queries/0_stateless/02708_dot_product.sql
deleted file mode 100644
index e94cb577bf4..00000000000
--- a/tests/queries/0_stateless/02708_dot_product.sql
+++ /dev/null
@@ -1,55 +0,0 @@
-SELECT dotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]);
-
-SELECT scalarProduct([12, 2.22, 302], [1.32, 231.2, 11.1]);
-
-SELECT arrayDotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]);
-
-SELECT dotProduct([1.3, 2, 3, 4, 5], [222, 12, 5.3, 2, 8]);
-
-SELECT dotProduct([1, 1, 1, 1, 1], [222, 12, 0, -12, 8]);
-
-SELECT round(dotProduct([12345678901234567], [1]) - dotProduct(tuple(12345678901234567), tuple(1)), 2);
-
-SELECT round(dotProduct([-1, 2, 3.002], [2, 3.4, 4]) - dotProduct((-1, 2, 3.002), (2, 3.4, 4)), 2);
-
-DROP TABLE IF EXISTS product_fp64_fp64;
-CREATE TABLE product_fp64_fp64 (x Array(Float64), y Array(Float64)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_fp64_fp64 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_fp64_fp64;
-DROP TABLE product_fp64_fp64;
-
-DROP TABLE IF EXISTS product_fp32_fp32;
-CREATE TABLE product_fp32_fp32 (x Array(Float32), y Array(Float32)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_fp32_fp32 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp32;
-DROP TABLE product_fp32_fp32;
-
-DROP TABLE IF EXISTS product_fp32_fp64;
-CREATE TABLE product_fp32_fp64 (x Array(Float32), y Array(Float64)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_fp32_fp64 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp64;
-DROP TABLE product_fp32_fp64;
-
-DROP TABLE IF EXISTS product_uint8_fp64;
-CREATE TABLE product_uint8_fp64 (x Array(UInt8), y Array(Float64)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_uint8_fp64 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_uint8_fp64;
-DROP TABLE product_uint8_fp64;
-
-DROP TABLE IF EXISTS product_uint8_uint8;
-CREATE TABLE product_uint8_uint8 (x Array(UInt8), y Array(UInt8)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_uint8_uint8 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_uint8_uint8;
-DROP TABLE product_uint8_uint8;
-
-DROP TABLE IF EXISTS product_uint64_uint64;
-CREATE TABLE product_uint64_uint64 (x Array(UInt64), y Array(UInt64)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_uint64_uint64 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_uint64_uint64;
-DROP TABLE product_uint64_uint64;
-
-DROP TABLE IF EXISTS product_int32_uint64;
-CREATE TABLE product_int32_uint64 (x Array(Int32), y Array(UInt64)) engine = MergeTree() order by x;
-INSERT INTO TABLE product_int32_uint64 (x, y) values ([1, 2], [3, 4]);
-SELECT toTypeName(dotProduct(x, y)) from product_int32_uint64;
-DROP TABLE product_int32_uint64;

From f928eaf10ca06c7dec855a18c4d58b0f728dabee Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Tue, 20 Feb 2024 21:19:30 +0000
Subject: [PATCH 859/884] CI: hot fix for gh statuses

 #no_merge_commit
 #job_style_check
 #job_Stateless_tests_debug
---
 tests/ci/ci.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 819152fadc3..4d2b124a32c 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1642,13 +1642,7 @@ def main() -> int:
         if not args.skip_jobs:
             ci_cache = CiCache(s3, jobs_data["digests"])
 
-            if (
-                pr_info.is_release_branch()
-                or pr_info.event.get("pull_request", {})
-                .get("user", {})
-                .get("login", "not_maxknv")
-                == "maxknv"
-            ):
+            if pr_info.is_release_branch():
                 # wait for pending jobs to be finished, await_jobs is a long blocking call
                 # wait pending jobs (for now only on release/master branches)
                 ready_jobs_batches_dict = ci_cache.await_jobs(
@@ -1838,7 +1832,7 @@ def main() -> int:
                         pr_info.sha,
                         job_report.test_results,
                         job_report.additional_files,
-                        job_report.check_name or args.job_name,
+                        job_report.check_name or _get_ext_check_name(args.job_name),
                         additional_urls=additional_urls or None,
                     )
                 commit = get_commit(
@@ -1849,7 +1843,7 @@ def main() -> int:
                     job_report.status,
                     check_url,
                     format_description(job_report.description),
-                    job_report.check_name or args.job_name,
+                    job_report.check_name or _get_ext_check_name(args.job_name),
                     pr_info,
                     dump_to_file=True,
                 )
@@ -1867,7 +1861,7 @@ def main() -> int:
                 job_report.duration,
                 job_report.start_time,
                 check_url or "",
-                job_report.check_name or args.job_name,
+                job_report.check_name or _get_ext_check_name(args.job_name),
             )
             ch_helper.insert_events_into(
                 db="default", table="checks", events=prepared_events

From 7d354164a5c47b762cfac2aea7406cd99fc00fe0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 21:41:10 +0000
Subject: [PATCH 860/884] Add performance test for dotProduct()

---
 tests/performance/dotProduct.xml    | 62 +++++++++++++++++++++++++++++
 tests/performance/norm_distance.xml |  6 +--
 2 files changed, 65 insertions(+), 3 deletions(-)
 create mode 100644 tests/performance/dotProduct.xml

diff --git a/tests/performance/dotProduct.xml b/tests/performance/dotProduct.xml
new file mode 100644
index 00000000000..6e056964ebb
--- /dev/null
+++ b/tests/performance/dotProduct.xml
@@ -0,0 +1,62 @@
+<test>
+
+    <substitutions>
+        <substitution>
+            <name>element_type</name>
+            <values>
+                <!-- integer cases are not relevant in practice for vector search, disable to reduce test runtime -->
+                <!-- <value>UInt8</value> -->
+                <!-- <value>Int16</value> -->
+                <!-- <value>Int32</value> -->
+                <!-- <value>Int64</value> -->
+                <value>Float32</value>
+                <value>Float64</value>
+            </values>
+        </substitution>
+    </substitutions>
+
+    <create_query>
+        CREATE TABLE vecs_{element_type} (
+            v Array({element_type})
+        ) ENGINE=Memory;
+    </create_query>
+
+    <!-- Gererate arrays with random data -->
+    <!-- Dimension = 150 is realistic for vector search use cases -->
+
+    <fill_query>
+        INSERT INTO vecs_{element_type}
+        SELECT v FROM (
+            SELECT
+                number AS n,
+                [
+                    rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9),
+                    rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19),
+                    rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29),
+                    rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39),
+                    rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49),
+                    rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59),
+                    rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69),
+                    rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79),
+                    rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89),
+                    rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99),
+                    rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109),
+                    rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119),
+                    rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129),
+                    rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139),
+                    rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149)
+                ] AS v
+            FROM system.numbers
+            LIMIT 5000000
+        );
+    </fill_query>
+
+    <settings>
+        <max_threads>1</max_threads>
+    </settings>
+
+    <query>SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vecs_{element_type})</query>
+
+    <drop_query>DROP TABLE vecs_{element_type}</drop_query>
+
+</test>
diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml
index 1e879607dac..69ed71d026f 100644
--- a/tests/performance/norm_distance.xml
+++ b/tests/performance/norm_distance.xml
@@ -4,11 +4,11 @@
         <substitution>
             <name>element_type</name>
             <values>
-                <!-- 8 and 16 bit cases are not relevant in practice, disable to reduce test runtime -->
+                <!-- integer cases are not relevant in practice for vector search, disable to reduce test runtime -->
                 <!-- <value>UInt8</value> -->
                 <!-- <value>Int16</value> -->
-                <value>Int32</value>
-                <value>Int64</value>
+                <!-- <value>Int32</value> -->
+                <!-- <value>Int64</value> -->
                 <value>Float32</value>
                 <value>Float64</value>
             </values>

From e57b8f64e0e8112c063daacba4933145000603de Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 22:16:32 +0000
Subject: [PATCH 861/884] Help the compiler a bit with auto-vectorization

For query
   SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vectors)
with vectors of type
   Array(Float32) and
   Array(Float64)
and dimension = 150, runtimes go down from
  0.46 sec to 0.34 sec (Float32) and
  0.74 sec to 0.68 sec (Float64))

The latter (only a minor improvement) is independent of the number of
lanes (VEC_SIZE = 4 vs. 2 which is the theoretical optimal for Float64).
---
 src/Functions/array/arrayDotProduct.cpp | 46 ++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 8a63d11de7d..209c5fc1ac5 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -212,16 +212,54 @@ public:
         return result_type;
     }
 
+    /// Modeled after the implementation of distance functions L1Distance(), L2Distance() etc.
+    template <typename Type>
+    struct State
+    {
+        Type sum = 0;
+
+        void accumulate(Type x, Type y)
+        {
+            sum += x * y;
+        }
+
+        void combine(const State<Type> & other_state)
+        {
+            sum += other_state.sum;
+        }
+
+        Type finalize()
+        {
+            return sum;
+        }
+    };
+
     template <typename ResultType, typename LeftType, typename RightType>
     static NO_SANITIZE_UNDEFINED ResultType apply(
         const LeftType * left,
         const RightType * right,
         size_t size)
     {
-        ResultType result = 0;
-        for (size_t i = 0; i < size; ++i)
-            result += static_cast<ResultType>(left[i]) * static_cast<ResultType>(right[i]);
-        return result;
+        /// Process chunks in vectorized manner
+        static constexpr size_t VEC_SIZE = 4;
+        State<ResultType> states[VEC_SIZE];
+        size_t i = 0;
+        for (; i + VEC_SIZE < size; i += VEC_SIZE)
+        {
+            for (size_t j = 0; j < VEC_SIZE; ++j)
+                states[j].accumulate(static_cast<ResultType>(left[i + j]), static_cast<ResultType>(right[i + j]));
+        }
+
+        State<ResultType> state;
+        for (const auto & other_state : states)
+            state.combine(other_state);
+
+        /// Process the tail
+        for (; i < size; ++i)
+            state.accumulate(static_cast<ResultType>(left[i]), static_cast<ResultType>(right[i]));
+
+        ResultType res = state.finalize();
+        return res;
     }
 };
 

From daa61a8576918ef650e314d06f4ece08cbbc74e7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Feb 2024 22:51:11 +0000
Subject: [PATCH 862/884] Manualy vectorize arrayDotProduct()

Measurements go down from
- 0.34 sec --> 0.30 sec (Float32)
- 0.68 sec --> 0.54 sec
---
 src/Functions/array/arrayDotProduct.cpp | 62 ++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 209c5fc1ac5..2fc162208ae 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -9,6 +9,9 @@
 #include <Interpreters/Context_fwd.h>
 #include <base/types.h>
 
+#if USE_MULTITARGET_CODE
+#include <immintrin.h>
+#endif
 
 namespace DB
 {
@@ -234,25 +237,80 @@ public:
         }
     };
 
+#if USE_MULTITARGET_CODE
+    template <typename ResultType>
+    AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
+        const ResultType * __restrict data_x,
+        const ResultType * __restrict data_y,
+        size_t i_max,
+        size_t & i,
+        State<ResultType> & state)
+    {
+        __m512 sums;
+        if constexpr (std::is_same_v<ResultType, Float32>)
+            sums = _mm512_setzero_ps();
+        else
+            sums = _mm512_setzero_pd();
+
+        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+
+        for (; i + n < i_max; i += n)
+        {
+            if constexpr (std::is_same_v<ResultType, Float32>)
+            {
+                __m512 x = _mm512_loadu_ps(data_x + i);
+                __m512 y = _mm512_loadu_ps(data_y + i);
+                sums = _mm512_fmadd_ps(x, y, sums);
+            }
+            else
+            {
+                __m512 x = _mm512_loadu_pd(data_x + i);
+                __m512 y = _mm512_loadu_pd(data_y + i);
+                sums = _mm512_fmadd_pd(x, y, sums);
+            }
+        }
+
+        if constexpr (std::is_same_v<ResultType, Float32>)
+            state.sum = _mm512_reduce_add_ps(sums);
+        else
+            state.sum = _mm512_reduce_add_pd(sums);
+    }
+#endif
+
     template <typename ResultType, typename LeftType, typename RightType>
     static NO_SANITIZE_UNDEFINED ResultType apply(
         const LeftType * left,
         const RightType * right,
         size_t size)
     {
+        State<ResultType> state;
+        size_t i = 0;
+
+        /// SIMD optimization: process multiple elements in both input arrays at once.
+        /// To avoid combinatorial explosion of SIMD kernels, focus on
+        /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
+        ///   10 input types x 8 output types,
+        /// - the most powerful SIMD instruction set (AVX-512F).
+#if USE_MULTITARGET_CODE
+        if constexpr ((std::is_same_v<ResultType, Float32> || std::is_same_v<ResultType, Float64>)
+                        && std::is_same_v<ResultType, LeftType> && std::is_same_v<LeftType, RightType>)
+        {
+            if (isArchSupported(TargetArch::AVX512F))
+                accumulateCombine(left, right, size, i, state);
+        }
+#else
         /// Process chunks in vectorized manner
         static constexpr size_t VEC_SIZE = 4;
         State<ResultType> states[VEC_SIZE];
-        size_t i = 0;
         for (; i + VEC_SIZE < size; i += VEC_SIZE)
         {
             for (size_t j = 0; j < VEC_SIZE; ++j)
                 states[j].accumulate(static_cast<ResultType>(left[i + j]), static_cast<ResultType>(right[i + j]));
         }
 
-        State<ResultType> state;
         for (const auto & other_state : states)
             state.combine(other_state);
+#endif
 
         /// Process the tail
         for (; i < size; ++i)

From c0e1f5016843607180612322e1ab81bf361c7c7e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 20 Feb 2024 12:09:48 +0300
Subject: [PATCH 863/884] Copy S3 file GCP fallback to buffer copy

---
 src/IO/S3/copyS3File.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 98024e74f8e..e9f4a555b05 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -746,7 +746,10 @@ namespace
                     break;
                 }
 
-                if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument")
+                if (outcome.GetError().GetExceptionName() == "EntityTooLarge" ||
+                    outcome.GetError().GetExceptionName() == "InvalidRequest" ||
+                    outcome.GetError().GetExceptionName() == "InvalidArgument" ||
+                    (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT))
                 {
                     if (!supports_multipart_copy)
                     {

From 25bfcdd21f594b15576652ea1c202a40ae02c800 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 21 Feb 2024 11:40:41 +0300
Subject: [PATCH 864/884] Fixed code review issues

---
 src/IO/S3/copyS3File.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index e9f4a555b05..51518df268c 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -749,7 +749,9 @@ namespace
                 if (outcome.GetError().GetExceptionName() == "EntityTooLarge" ||
                     outcome.GetError().GetExceptionName() == "InvalidRequest" ||
                     outcome.GetError().GetExceptionName() == "InvalidArgument" ||
-                    (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT))
+                    (outcome.GetError().GetExceptionName() == "InternalError" &&
+                        outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT &&
+                        outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API")))
                 {
                     if (!supports_multipart_copy)
                     {

From 8d6088c2e60b0b4179b6cca0d30ab7c768ef749a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 21 Feb 2024 09:48:34 +0100
Subject: [PATCH 865/884] Fix test

---
 tests/integration/test_backup_restore_s3/configs/s3_settings.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
index adeb61cbe07..61ef7759b57 100644
--- a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
+++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <s3>
+        <use_environment_credentials>0</use_environment_credentials>
         <multipart>
             <endpoint>http://minio1:9001/root/data/backups/multipart/</endpoint>
             <!-- We set max_single_part_upload_size and max_single_operation_copy_size to 1 here so

From 199da94176b40c8f5c1b82c985cfa496afb97782 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Wed, 21 Feb 2024 11:13:41 +0100
Subject: [PATCH 866/884] Revert "Do not retry queries if container is down in
 integration tests"

---
 tests/integration/helpers/cluster.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 95722dd0db9..1d96563251b 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -3484,10 +3484,6 @@ class ClickHouseInstance:
                 if check_callback(result):
                     return result
                 time.sleep(sleep_time)
-            except QueryRuntimeException as ex:
-                # Container is down, this is likely due to server crash.
-                if "No route to host" in str(ex):
-                    raise
             except Exception as ex:
                 # logging.debug("Retry {} got exception {}".format(i + 1, ex))
                 time.sleep(sleep_time)

From fee1565780d9587fa75727c95bd13e23f236db4a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Wed, 21 Feb 2024 11:33:08 +0100
Subject: [PATCH 867/884] Revert "Check stack size in Parser"

---
 src/AggregateFunctions/AggregateFunctionCount.h  |  2 +-
 .../AggregateFunctionFactory.cpp                 |  4 ++--
 .../Combinators/AggregateFunctionIf.cpp          |  2 +-
 .../Combinators/AggregateFunctionNull.h          |  2 +-
 src/Client/ConnectionEstablisher.h               |  1 +
 src/Client/MultiplexedConnections.cpp            |  2 +-
 src/Client/PacketReceiver.h                      |  1 +
 src/Columns/ColumnArray.cpp                      |  4 ++--
 src/Columns/ColumnNullable.cpp                   |  6 +++++-
 src/Columns/getLeastSuperColumn.cpp              |  2 +-
 src/Common/Fiber.h                               | 15 ++++++++-------
 src/Common/SensitiveDataMasker.cpp               |  2 +-
 src/Common/SipHash.h                             |  2 +-
 src/Common/StackTrace.cpp                        |  3 +++
 src/Common/checkStackSize.cpp                    | 12 ++++--------
 src/Coordination/KeeperStorage.cpp               |  2 +-
 src/Core/MySQL/PacketEndpoint.cpp                |  2 +-
 src/DataTypes/DataTypeAggregateFunction.cpp      |  2 +-
 .../DataTypeCustomSimpleAggregateFunction.cpp    |  2 +-
 .../MySQL/MaterializedMySQLSyncThread.cpp        |  4 ++--
 src/Functions/EmptyImpl.h                        |  2 +-
 src/Functions/FunctionsComparison.h              |  6 +++---
 src/Functions/FunctionsConversion.h              |  2 +-
 src/Functions/FunctionsLogical.cpp               |  2 +-
 src/Functions/trap.cpp                           |  2 +-
 src/Interpreters/Aggregator.cpp                  |  6 +++---
 src/Interpreters/ArrayJoinedColumnsVisitor.h     |  2 +-
 src/Interpreters/ClientInfo.cpp                  |  4 ++--
 src/Interpreters/CrossToInnerJoinVisitor.cpp     |  4 ++--
 src/Interpreters/DatabaseAndTableWithAlias.cpp   |  2 +-
 src/Interpreters/HashJoin.cpp                    |  2 +-
 .../InJoinSubqueriesPreprocessor.cpp             |  4 ++--
 .../InterpreterSelectWithUnionQuery.cpp          |  2 +-
 .../JoinToSubqueryTransformVisitor.cpp           |  4 ++--
 src/Interpreters/MergeJoin.cpp                   |  2 +-
 .../MySQL/InterpretersMySQLDDLQuery.cpp          |  2 +-
 src/Interpreters/ProcessList.cpp                 |  4 ++--
 src/Interpreters/Set.cpp                         |  4 ++--
 src/Interpreters/SetVariants.cpp                 |  2 +-
 src/Interpreters/TablesStatus.cpp                |  2 +-
 .../TranslateQualifiedNamesVisitor.cpp           |  2 +-
 src/Interpreters/evaluateConstantExpression.cpp  |  2 +-
 src/Interpreters/getHeaderForProcessingStage.cpp |  3 ++-
 src/Parsers/ExpressionElementParsers.cpp         |  2 +-
 src/Parsers/IParser.h                            | 16 ----------------
 .../Formats/Impl/JSONEachRowRowInputFormat.cpp   |  2 +-
 .../Formats/RowInputFormatWithDiagnosticInfo.cpp |  2 +-
 .../Sources/WaitForAsyncInsertSource.h           |  2 +-
 .../Transforms/CreatingSetsTransform.cpp         |  2 +-
 .../Transforms/getSourceFromASTInsertQuery.cpp   |  6 +++---
 src/QueryPipeline/ExecutionSpeedLimits.cpp       |  2 +-
 .../RemoteQueryExecutorReadContext.h             |  1 +
 src/Server/HTTPHandlerFactory.cpp                |  2 +-
 src/Server/TCPHandler.cpp                        |  2 +-
 src/Storages/MergeTree/DataPartsExchange.cpp     |  2 +-
 .../MergeTree/EphemeralLockInZooKeeper.cpp       |  4 ++--
 src/Storages/MergeTree/MergeTreeData.cpp         |  8 ++++----
 .../MergeTree/MergeTreeDataMergerMutator.cpp     |  4 ++--
 src/Storages/MergeTree/MergeTreeDataWriter.cpp   |  6 +++---
 .../MergeTreeIndexConditionBloomFilter.cpp       |  2 +-
 .../MergeTreeIndexConditionBloomFilter.h         |  2 +-
 .../MergeTree/MergeTreeIndexGranularityInfo.cpp  |  8 ++++----
 src/Storages/StorageJoin.cpp                     |  2 +-
 src/Storages/StorageLog.cpp                      |  6 +++---
 src/Storages/StorageReplicatedMergeTree.cpp      |  8 ++++----
 src/Storages/StorageView.cpp                     |  8 ++++----
 src/Storages/System/StorageSystemStackTrace.cpp  |  2 +-
 .../transformQueryForExternalDatabase.cpp        |  2 +-
 .../02985_parser_check_stack_size.reference      |  1 -
 .../0_stateless/02985_parser_check_stack_size.sh |  7 -------
 utils/check-style/check-style                    |  5 -----
 71 files changed, 117 insertions(+), 138 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02985_parser_check_stack_size.reference
 delete mode 100755 tests/queries/0_stateless/02985_parser_check_stack_size.sh

diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h
index f5d6030a777..36a8617ba91 100644
--- a/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/src/AggregateFunctions/AggregateFunctionCount.h
@@ -219,7 +219,7 @@ public:
         : IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullUnary>({argument}, params, createResultType())
     {
         if (!argument->isNullable())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not Nullable data type passed to AggregateFunctionCountNotNullUnary");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary");
     }
 
     String getName() const override { return "count"; }
diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 18edb7c8ce0..b6ba562045d 100644
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -100,7 +100,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
     {
         AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null");
         if (!combinator)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find aggregate function combinator "
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot find aggregate function combinator "
                             "to apply a function to Nullable arguments.");
 
         DataTypes nested_types = combinator->transformArguments(types_without_low_cardinality);
@@ -123,7 +123,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
     auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
 
     if (!with_original_arguments)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory returned nullptr");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
     return with_original_arguments;
 }
 
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
index 9b5ee79a533..20a4bde6bb4 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
@@ -249,7 +249,7 @@ public:
         : Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size())
     {
         if (number_of_arguments == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Single argument is passed to AggregateFunctionIfNullVariadic");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: single argument is passed to AggregateFunctionIfNullVariadic");
 
         if (number_of_arguments > MAX_ARGS)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
index c8574e82be5..6b6580bf4c4 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h
@@ -429,7 +429,7 @@ public:
         , number_of_arguments(arguments.size())
     {
         if (number_of_arguments == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Single argument is passed to AggregateFunctionNullVariadic");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: single argument is passed to AggregateFunctionNullVariadic");
 
         if (number_of_arguments > MAX_ARGS)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index a3a01e63246..1fa08d435e9 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -3,6 +3,7 @@
 #include <Common/AsyncTaskExecutor.h>
 #include <Common/Epoll.h>
 #include <Common/Fiber.h>
+#include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
 #include <Common/PoolWithFailoverBase.h>
 #include <Client/ConnectionPool.h>
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index 8433c8afe9f..c7d7d0143c8 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -320,7 +320,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
     ReplicaState & state = getReplicaForReading();
     current_connection = state.connection;
     if (current_connection == nullptr)
-        throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "No available replica");
+        throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
 
     Packet packet;
     try
diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h
index 6b3da659290..deedf5cccdc 100644
--- a/src/Client/PacketReceiver.h
+++ b/src/Client/PacketReceiver.h
@@ -5,6 +5,7 @@
 #include <variant>
 
 #include <Client/IConnections.h>
+#include <Common/FiberStack.h>
 #include <Common/Fiber.h>
 #include <Common/Epoll.h>
 #include <Common/TimerDescriptor.h>
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index b3376b35b2e..6f60ec0e642 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -810,7 +810,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
     size_t tuple_size = tuple.tupleSize();
 
     if (tuple_size == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty tuple");
 
     Columns temporary_arrays(tuple_size);
     for (size_t i = 0; i < tuple_size; ++i)
@@ -1263,7 +1263,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
     size_t tuple_size = tuple.tupleSize();
 
     if (tuple_size == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty tuple");
 
     Columns temporary_arrays(tuple_size);
     for (size_t i = 0; i < tuple_size; ++i)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index ddf5fc696fb..93638371b84 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -1,5 +1,7 @@
 #include <Common/Arena.h>
 #include <Common/SipHash.h>
+#include <Common/NaNUtils.h>
+#include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/WeakHash.h>
 #include <Columns/ColumnDecimal.h>
@@ -26,6 +28,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_COLUMN;
+    extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
     extern const int NOT_IMPLEMENTED;
 }
 
@@ -826,7 +829,8 @@ void ColumnNullable::applyNullMap(const ColumnNullable & other)
 void ColumnNullable::checkConsistency() const
 {
     if (null_map->size() != getNestedColumn().size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of nested column and null map of Nullable column are not equal");
+        throw Exception(ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT,
+                        "Logical error: Sizes of nested column and null map of Nullable column are not equal");
 }
 
 ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
diff --git a/src/Columns/getLeastSuperColumn.cpp b/src/Columns/getLeastSuperColumn.cpp
index 4f4a5f2b9b8..6ec5ca7a9c1 100644
--- a/src/Columns/getLeastSuperColumn.cpp
+++ b/src/Columns/getLeastSuperColumn.cpp
@@ -21,7 +21,7 @@ static bool sameConstants(const IColumn & a, const IColumn & b)
 ColumnWithTypeAndName getLeastSuperColumn(const std::vector<const ColumnWithTypeAndName *> & columns)
 {
     if (columns.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No src columns for supercolumn");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no src columns for supercolumn");
 
     ColumnWithTypeAndName result = *columns[0];
 
diff --git a/src/Common/Fiber.h b/src/Common/Fiber.h
index 8b88bd323ef..f48ace149f4 100644
--- a/src/Common/Fiber.h
+++ b/src/Common/Fiber.h
@@ -17,7 +17,7 @@ private:
     template <typename T> friend class FiberLocal;
 
 public:
-    template <typename StackAlloc, typename Fn>
+    template< typename StackAlloc, typename Fn>
     Fiber(StackAlloc && salloc, Fn && fn) : impl(std::allocator_arg_t(), std::forward<StackAlloc>(salloc), RoutineImpl(std::forward<Fn>(fn)))
     {
     }
@@ -46,12 +46,6 @@ public:
         current_fiber = parent_fiber;
     }
 
-    static FiberPtr & getCurrentFiber()
-    {
-        thread_local static FiberPtr current_fiber;
-        return current_fiber;
-    }
-
 private:
     template <typename Fn>
     struct RoutineImpl
@@ -80,6 +74,12 @@ private:
         Fn fn;
     };
 
+    static FiberPtr & getCurrentFiber()
+    {
+        thread_local static FiberPtr current_fiber;
+        return current_fiber;
+    }
+
     /// Special wrapper to store data in uniquer_ptr.
     struct DataWrapper
     {
@@ -146,3 +146,4 @@ private:
 
     T main_instance;
 };
+
diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index 28eae6f451d..70346919f65 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -91,7 +91,7 @@ void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker>&& sen
 {
 
     if (!sensitive_data_masker_)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The 'sensitive_data_masker' is not set");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: the 'sensitive_data_masker' is not set");
 
     if (sensitive_data_masker_->rulesCount() > 0)
     {
diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h
index 729fb76a573..5f27fdaa4b6 100644
--- a/src/Common/SipHash.h
+++ b/src/Common/SipHash.h
@@ -209,7 +209,7 @@ public:
     {
         if (!is_reference_128)
             throw DB::Exception(
-                DB::ErrorCodes::LOGICAL_ERROR, "Can't call get128Reference when is_reference_128 is not set");
+                DB::ErrorCodes::LOGICAL_ERROR, "Logical error: can't call get128Reference when is_reference_128 is not set");
         finalize();
         const auto lo = v0 ^ v1 ^ v2 ^ v3;
         v1 ^= 0xdd;
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 436b85ff30b..7e683ae91de 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -448,6 +448,9 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
             DB::writePointerHex(frame.physical_addr, out);
         }
 
+        if (frame.object.has_value())
+            out << " in " << *frame.object;
+
         callback(out.str());
     };
 #else
diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index c88554ca8fe..8c2a0aaed7f 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -1,8 +1,8 @@
-#include <base/getThreadId.h>
-#include <base/defines.h> /// THREAD_SANITIZER
 #include <Common/checkStackSize.h>
 #include <Common/Exception.h>
-#include <Common/Fiber.h>
+#include <base/getThreadId.h>
+#include <base/scope_guard.h>
+#include <base/defines.h> /// THREAD_SANITIZER
 #include <sys/resource.h>
 #include <pthread.h>
 #include <unistd.h>
@@ -114,10 +114,6 @@ __attribute__((__weak__)) void checkStackSize()
 {
     using namespace DB;
 
-    /// Not implemented for coroutines.
-    if (Fiber::getCurrentFiber())
-        return;
-
     if (!stack_address)
         max_stack_size = getStackSize(&stack_address);
 
@@ -140,7 +136,7 @@ __attribute__((__weak__)) void checkStackSize()
 
     /// We assume that stack grows towards lower addresses. And that it starts to grow from the end of a chunk of memory of max_stack_size.
     if (int_frame_address > int_stack_address + max_stack_size)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Frame address is greater than stack begin address");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: frame address is greater than stack begin address");
 
     size_t stack_size = int_stack_address + max_stack_size - int_frame_address;
     size_t max_stack_size_allowed = static_cast<size_t>(max_stack_size * STACK_SIZE_FREE_RATIO);
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 9321fb33163..d3101543362 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -1582,7 +1582,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
         {
             auto path_prefix = request.path;
             if (path_prefix.empty())
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Path cannot be empty");
+                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: path cannot be empty");
 
             const auto & children = node_it->value.getChildren();
             response.names.reserve(children.size());
diff --git a/src/Core/MySQL/PacketEndpoint.cpp b/src/Core/MySQL/PacketEndpoint.cpp
index 085d7595167..97b5d3b4d11 100644
--- a/src/Core/MySQL/PacketEndpoint.cpp
+++ b/src/Core/MySQL/PacketEndpoint.cpp
@@ -40,7 +40,7 @@ bool PacketEndpoint::tryReceivePacket(IMySQLReadPacket & packet, UInt64 millisec
         ReadBufferFromPocoSocket * socket_in = typeid_cast<ReadBufferFromPocoSocket *>(in);
 
         if (!socket_in)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to pull the duration in a non socket stream");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Attempt to pull the duration in a non socket stream");
 
         if (!socket_in->poll(millisecond * 1000))
             return false;
diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp
index 14a3c6a4248..7dc036cafa4 100644
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -239,7 +239,7 @@ static DataTypePtr create(const ASTPtr & arguments)
         argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
 
     if (function_name.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty name of aggregate function passed");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
 
     AggregateFunctionProperties properties;
     AggregateFunctionPtr function = AggregateFunctionFactory::instance().get(function_name, action, argument_types, params_row, properties);
diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
index ee9870eb0ef..aa3b154e49b 100644
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -141,7 +141,7 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
         argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
 
     if (function_name.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty name of aggregate function passed");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
 
     AggregateFunctionProperties properties;
     /// NullsAction is not part of the type definition, instead it will have transformed the function into a different one
diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
index 20db8036942..2656835f912 100644
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@@ -779,7 +779,7 @@ static void writeFieldsToColumn(
                         casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num);
                     }
                     else
-                        throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: it is a bug.");
                 }
             }
         }
@@ -844,7 +844,7 @@ static inline bool differenceSortingKeys(const Tuple & row_old_data, const Tuple
 static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t version, const std::vector<size_t> & sorting_columns_index)
 {
     if (rows_data.size() % 2 != 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: It is a bug.");
 
     size_t prev_bytes = buffer.bytes();
     std::vector<bool> writeable_rows_mask(rows_data.size());
diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h
index d3b2dda024b..52484524e6a 100644
--- a/src/Functions/EmptyImpl.h
+++ b/src/Functions/EmptyImpl.h
@@ -35,7 +35,7 @@ struct EmptyImpl
     /// Only make sense if is_fixed_to_constant.
     static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/)
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: 'vectorFixedToConstant method' is called");
     }
 
     static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index 3be675f39b3..d04f76d051a 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -811,7 +811,7 @@ private:
                 c0_const_size = c0_const_fixed_string->getN();
             }
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnConst contains not String nor FixedString column");
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column");
         }
 
         if (c1_const)
@@ -830,7 +830,7 @@ private:
                 c1_const_size = c1_const_fixed_string->getN();
             }
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnConst contains not String nor FixedString column");
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column");
         }
 
         using StringImpl = StringComparisonImpl<Op<int, int>>;
@@ -1114,7 +1114,7 @@ private:
         /// This is a paranoid check to protect from a broken query analysis.
         if (c0->isNullable() != c1->isNullable())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Columns are assumed to be of identical types, but they are different in Nullable");
+                "Logical error: columns are assumed to be of identical types, but they are different in Nullable");
 
         if (c0_const && c1_const)
         {
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 62148fa8022..4089a5b542b 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2560,7 +2560,7 @@ public:
             if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
                 res = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false));
             else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug.");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: It is a bug.");
             else if constexpr (to_decimal)
             {
                 UInt64 scale = extractToDecimalScale(arguments[1]);
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index 2e0f4cd3038..d01fdc99076 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -530,7 +530,7 @@ DataTypePtr FunctionAnyArityLogical<Impl, Name>::getReturnTypeImpl(const DataTyp
         {
             has_nullable_arguments = arg_type->isNullable();
             if (has_nullable_arguments && !Impl::specialImplementationForNulls())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of argument for function \"{}\": "
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Unexpected type of argument for function \"{}\": "
                     " argument {} is of type {}", getName(), i + 1, arg_type->getName());
         }
 
diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp
index 6ce696fedb5..99430f039a4 100644
--- a/src/Functions/trap.cpp
+++ b/src/Functions/trap.cpp
@@ -177,7 +177,7 @@ public:
             }
             else if (mode == "logical error")
             {
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Trap");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap");
             }
             else
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode");
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 50fab486568..331cd991ea1 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -624,7 +624,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_)
         {
             size_t alignment_of_next_state = params.aggregates[i + 1].function->alignOfData();
             if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "`alignOfData` is not 2^N");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: alignOfData is not 2^N");
 
             /// Extend total_size to next alignment requirement
             /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state.
@@ -857,7 +857,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
                 return AggregatedDataVariants::Type::low_cardinality_keys128;
             if (size_of_field == 32)
                 return AggregatedDataVariants::Type::low_cardinality_keys256;
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "LowCardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
         }
 
         if (size_of_field == 1)
@@ -872,7 +872,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
             return AggregatedDataVariants::Type::keys128;
         if (size_of_field == 32)
             return AggregatedDataVariants::Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     if (params.keys_size == 1 && isFixedString(types_removed_nullable[0]))
diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h
index f16751c4561..3bbd6982213 100644
--- a/src/Interpreters/ArrayJoinedColumnsVisitor.h
+++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h
@@ -62,7 +62,7 @@ private:
     {
         auto [array_join_expression_list, _] = node.arrayJoinExpressionList();
         if (!array_join_expression_list)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "No ARRAY JOIN");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no ARRAY JOIN");
 
         std::vector<ASTPtr *> out;
         out.reserve(array_join_expression_list->children.size());
diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp
index e4778edeb9c..347ec115aba 100644
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@@ -23,7 +23,7 @@ namespace ErrorCodes
 void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 {
     if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::write is called for unsupported server revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::write is called for unsupported server revision");
 
     writeBinary(static_cast<UInt8>(query_kind), out);
     if (empty())
@@ -103,7 +103,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision)
 {
     if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::read is called for unsupported client revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::read is called for unsupported client revision");
 
     UInt8 read_query_kind = 0;
     readBinary(read_query_kind, in);
diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp
index e3e8b80e437..42af164f4ad 100644
--- a/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp
@@ -173,7 +173,7 @@ std::vector<JoinedElement> getTables(const ASTSelectQuery & select)
     {
         const auto * table_element = child->as<ASTTablesInSelectQueryElement>();
         if (!table_element)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "TablesInSelectQueryElement expected");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: TablesInSelectQueryElement expected");
 
         JoinedElement & t = joined_tables.emplace_back(*table_element);
         t.rewriteCommaToCross();
@@ -224,7 +224,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
     {
         if (joined_tables.size() != data.tables_with_columns.size())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
-                            "Inconsistent number of tables: {} != {}",
+                            "Logical error: inconsistent number of tables: {} != {}",
                             joined_tables.size(), data.tables_with_columns.size());
 
         for (size_t i = 0; i < joined_tables.size(); ++i)
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp
index 329391b45d7..db020cb9166 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.cpp
+++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp
@@ -71,7 +71,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression &
         alias = table_expression.subquery->tryGetAlias();
     }
     else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No known elements in ASTTableExpression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no known elements in ASTTableExpression");
 }
 
 bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias) const
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 73487a0914a..33dc178ca00 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -368,7 +368,7 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c
             return Type::keys128;
         if (size_of_field == 32)
             return Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
index ec4241a2740..3858830a43b 100644
--- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
+++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
@@ -103,12 +103,12 @@ private:
                     /// Already processed.
                 }
                 else
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name {}", concrete->name);
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected function name {}", concrete->name);
             }
             else if (table_join)
                 table_join->locality = JoinLocality::Global;
             else
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST node");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected AST node");
         }
         else if (distributed_product_mode == DistributedProductMode::DENY)
         {
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index cc1d7dd6531..16bc4b1fe2e 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -56,7 +56,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
 
     size_t num_children = ast->list_of_selects->children.size();
     if (!num_children)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTSelectWithUnionQuery");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTSelectWithUnionQuery");
 
     /// Note that we pass 'required_result_column_names' to first SELECT.
     /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index 6251a9604e1..bf2d1eb79cd 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -168,7 +168,7 @@ private:
                 has_asterisks = true;
 
                 if (!qualified_asterisk->qualifier)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier");
 
                 auto & identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
 
@@ -183,7 +183,7 @@ private:
                             transformer->as<ASTColumnsReplaceTransformer>())
                             IASTColumnsTransformer::transform(transformer, columns);
                         else
-                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must only have children of IASTColumnsTransformer type");
+                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must only have children of IASTColumnsTransformer type");
                     }
                 }
             }
diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index d5fb0208d45..901c82029ee 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -239,7 +239,7 @@ public:
 
         /// SortCursorImpl can work with permutation, but MergeJoinCursor can't.
         if (impl.permutation)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinCursor doesn't support permutation");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation");
     }
 
     size_t position() const { return impl.getRow(); }
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
index 107b435ded4..0fdc9347ee9 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@@ -337,7 +337,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys)
         WhichDataType which(type);
 
         if (which.isNullable())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "MySQL's primary key must be not null, it is a bug.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: MySQL primary key must be not null, it is a bug.");
 
         if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64())
         {
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 3bd7b2d4206..5b3b87114ae 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -295,7 +295,7 @@ ProcessListEntry::~ProcessListEntry()
     auto user_process_list_it = parent.user_to_queries.find(user);
     if (user_process_list_it == parent.user_to_queries.end())
     {
-        LOG_ERROR(getLogger("ProcessList"), "Cannot find user in ProcessList");
+        LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find user in ProcessList");
         std::terminate();
     }
 
@@ -323,7 +323,7 @@ ProcessListEntry::~ProcessListEntry()
 
     if (!found)
     {
-        LOG_ERROR(getLogger("ProcessList"), "Cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser");
+        LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser");
         std::terminate();
     }
 
diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp
index 8f11754b3be..84260faafd4 100644
--- a/src/Interpreters/Set.cpp
+++ b/src/Interpreters/Set.cpp
@@ -275,7 +275,7 @@ void Set::appendSetElements(SetKeyColumns & holder)
 void Set::checkIsCreated() const
 {
     if (!is_created.load())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to use set before it has been built.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Trying to use set before it has been built.");
 }
 
 ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) const
@@ -283,7 +283,7 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co
     size_t num_key_columns = columns.size();
 
     if (0 == num_key_columns)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No columns passed to Set::execute method.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method.");
 
     auto res = ColumnUInt8::create();
     ColumnUInt8::Container & vec_res = res->getData();
diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp
index 0fb2e5189d4..cd9148a01cf 100644
--- a/src/Interpreters/SetVariants.cpp
+++ b/src/Interpreters/SetVariants.cpp
@@ -146,7 +146,7 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
             return Type::keys128;
         if (size_of_field == 32)
             return Type::keys256;
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.");
     }
 
     /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
diff --git a/src/Interpreters/TablesStatus.cpp b/src/Interpreters/TablesStatus.cpp
index 911a028f813..005a4515c3a 100644
--- a/src/Interpreters/TablesStatus.cpp
+++ b/src/Interpreters/TablesStatus.cpp
@@ -35,7 +35,7 @@ void TableStatus::read(ReadBuffer & in)
 void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const
 {
     if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Method TablesStatusRequest::write is called for unsupported server revision");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method TablesStatusRequest::write is called for unsupported server revision");
 
     writeVarUInt(tables.size(), out);
     for (const auto & table_name : tables)
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 3de7e217e53..130ce2194fd 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -158,7 +158,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
 void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
 {
     if (!node.qualifier)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier");
 
     /// @note it could contain table alias as table name.
     DatabaseAndTableWithAlias db_and_table(node.qualifier);
diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index b5c3e00e299..00d36750cc1 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -106,7 +106,7 @@ std::optional<EvaluateConstantExpressionResult> evaluateConstantExpressionImpl(c
 
     if (result_column->empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Empty result column after evaluation "
+                        "Logical error: empty result column after evaluation "
                         "of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument");
 
     /// Expressions like rand() or now() are not constant
diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index 67a909ba6b4..d16e01ef2d2 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -167,7 +167,8 @@ Block getHeaderForProcessingStage(
             return result;
         }
     }
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown processed stage.");
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical Error: unknown processed stage.");
 }
 
 }
+
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 486555ae86d..62c480e0f6b 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -934,7 +934,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         {
             if (float_value < 0)
                 throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Token number cannot begin with minus, "
+                                "Logical error: token number cannot begin with minus, "
                                 "but parsed float number is less than zero.");
 
             if (negative)
diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index 198ec0346ff..d53b58baa7c 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -9,7 +9,6 @@
 #include <Parsers/TokenIterator.h>
 #include <base/types.h>
 #include <Common/Exception.h>
-#include <Common/checkStackSize.h>
 
 
 namespace DB
@@ -74,21 +73,6 @@ public:
             if (unlikely(max_depth > 0 && depth > max_depth))
                 throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. "
                     "Consider rising max_parser_depth parameter.", max_depth);
-
-            /** Sometimes the maximum parser depth can be set to a high value by the user,
-              * but we still want to avoid stack overflow.
-              * For this purpose, we can use the checkStackSize function, but it is too heavy.
-              * The solution is to check not too frequently.
-              * The frequency is arbitrary, but not too large, not too small,
-              * and a power of two to simplify the division.
-              */
-#if defined(USE_MUSL) || defined(SANITIZER) || !defined(NDEBUG)
-            static constexpr uint32_t check_frequency = 128;
-#else
-            static constexpr uint32_t check_frequency = 8192;
-#endif
-            if (depth % check_frequency == 0)
-                checkStackSize();
         }
 
         ALWAYS_INLINE void decreaseDepth()
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 6fa94356cd3..0ef19a9c14f 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -179,7 +179,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
             else if (column_index == NESTED_FIELD)
                 readNestedData(name_ref.toString(), columns);
             else
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal value of column_index");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: illegal value of column_index");
         }
         else
         {
diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
index fcf338577f8..a56c24a740a 100644
--- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
+++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp
@@ -136,7 +136,7 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co
     auto * curr_position = in->position();
 
     if (curr_position < prev_position)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parsing is non-deterministic.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: parsing is non-deterministic.");
 
     if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type))
     {
diff --git a/src/Processors/Sources/WaitForAsyncInsertSource.h b/src/Processors/Sources/WaitForAsyncInsertSource.h
index 78af6294202..1029c164941 100644
--- a/src/Processors/Sources/WaitForAsyncInsertSource.h
+++ b/src/Processors/Sources/WaitForAsyncInsertSource.h
@@ -33,7 +33,7 @@ protected:
     {
         auto status = insert_future.wait_for(std::chrono::milliseconds(timeout_ms));
         if (status == std::future_status::deferred)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state");
 
         if (status == std::future_status::timeout)
             throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms);
diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp
index eeb8f4a6060..cc0b5926e66 100644
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@@ -163,7 +163,7 @@ void CreatingSetsTransform::startSubquery()
     done_with_table = !external_table;
 
     if ((done_with_set && !set_from_cache) && done_with_table)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to do with subquery");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery");
 
     if (table_out.initialized())
     {
diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
index 8a13973b970..6c7c7447070 100644
--- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
+++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
@@ -37,7 +37,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
     const auto * ast_insert_query = ast->as<ASTInsertQuery>();
 
     if (!ast_insert_query)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query");
 
     if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER)
         throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Query has infile and was send directly to server");
@@ -47,7 +47,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
         if (input_function)
             throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");
         else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "INSERT query requires format to be set");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: INSERT query requires format to be set");
     }
 
     /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
@@ -105,7 +105,7 @@ std::unique_ptr<ReadBuffer> getReadBufferFromASTInsertQuery(const ASTPtr & ast)
 {
     const auto * insert_query = ast->as<ASTInsertQuery>();
     if (!insert_query)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query");
 
     if (insert_query->infile)
     {
diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp
index 05fd394db77..f8ae4c76d0f 100644
--- a/src/QueryPipeline/ExecutionSpeedLimits.cpp
+++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp
@@ -113,7 +113,7 @@ static bool handleOverflowMode(OverflowMode mode, int code, FormatStringHelper<A
             ProfileEvents::increment(ProfileEvents::OverflowBreak);
             return false;
         default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown overflow mode");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown overflow mode");
     }
 }
 
diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h
index 50df7e2db35..adfc0c5eacf 100644
--- a/src/QueryPipeline/RemoteQueryExecutorReadContext.h
+++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.h
@@ -5,6 +5,7 @@
 #include <mutex>
 #include <atomic>
 #include <Common/Fiber.h>
+#include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
 #include <Common/Epoll.h>
 #include <Common/AsyncTaskExecutor.h>
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 9e4a440ddb2..06ca1182be5 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -120,7 +120,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::
         return createPrometheusMainHandlerFactory(server, config, metrics_writer, name);
     }
 
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name.");
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name.");
 }
 
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 9464ef74586..833f8ecc818 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -943,7 +943,7 @@ void TCPHandler::processInsertQuery()
                 auto wait_status = result.future.wait_for(std::chrono::milliseconds(timeout_ms));
 
                 if (wait_status == std::future_status::deferred)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state");
 
                 if (wait_status == std::future_status::timeout)
                     throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms);
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 168c5f729ce..ce70fbe18e5 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -903,7 +903,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
         || part_name.empty()
         || std::string::npos != tmp_prefix.find_first_of("/.")
         || std::string::npos != part_name.find_first_of("/."))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters.");
 
     auto part_dir = tmp_prefix + part_name;
     auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : "");
diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
index cbdeabffa97..1ffb5177430 100644
--- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
+++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
@@ -17,7 +17,7 @@ EphemeralLockInZooKeeper::EphemeralLockInZooKeeper(const String & path_prefix_,
     : zookeeper(zookeeper_), path_prefix(path_prefix_), path(path_), conflict_path(conflict_path_)
 {
     if (conflict_path.empty() && path.size() <= path_prefix.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the main node is shorter than prefix.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the main node is shorter than prefix.");
 }
 
 template <typename T>
@@ -179,7 +179,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
             size_t prefix_size = block_numbers_path.size() + 1 + partitions[i].size() + 1 + path_prefix.size();
             const String & path = dynamic_cast<const Coordination::CreateResponse &>(*lock_responses[i]).path_created;
             if (path.size() <= prefix_size)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the sequential node is shorter than prefix.");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the sequential node is shorter than prefix.");
 
             UInt64 number = parse<UInt64>(path.c_str() + prefix_size, path.size() - prefix_size);
             locks.push_back(LockInfo{path, partitions[i], number});
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 5b297de3fda..e14a358745e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -869,7 +869,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Sign column for storage {} is empty", storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Sign column for storage {} is empty", storage);
         }
 
         bool miss_column = true;
@@ -896,7 +896,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column for storage {} is empty", storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column for storage {} is empty", storage);
         }
 
         bool miss_column = true;
@@ -925,12 +925,12 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
             if (is_optional)
                 return;
 
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "`is_deleted` ({}) column for storage {} is empty", is_deleted_column, storage);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage);
         }
         else
         {
             if (version_column.empty() && !is_optional)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column ({}) for storage {} is empty while is_deleted ({}) is not.",
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.",
                                 version_column, storage, is_deleted_column);
 
             bool miss_is_deleted_column = true;
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 1bf1d4a3c29..58fddde7b54 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -85,7 +85,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t max_coun
     if (scheduled_tasks_count > max_count)
     {
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}",
+            "Logical error: invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}",
             scheduled_tasks_count, max_count);
     }
 
@@ -511,7 +511,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
         /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl
         if (parts_to_merge.size() == 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge selector returned only one part to merge");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge selector returned only one part to merge");
 
         if (parts_to_merge.empty())
         {
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index ebf887f5e9e..c9c16b59f9e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -384,13 +384,13 @@ Block MergeTreeDataWriter::mergeBlock(
 
     /// Check that after first merge merging_algorithm is waiting for data from input 0.
     if (status.required_source != 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName());
 
     status = merging_algorithm->merge();
 
     /// Check that merge is finished.
     if (!status.is_finished)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge is not finished after the second merge.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge is not finished after the second merge.");
 
     /// Merged Block is sorted and we don't need to use permutation anymore
     permutation = nullptr;
@@ -439,7 +439,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         auto max_month = date_lut.toNumYYYYMM(max_date);
 
         if (min_month != max_month)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part spans more than one month.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: part spans more than one month.");
 
         part_name = new_part_info.getPartNameV0(min_date, max_date);
     }
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index f506230b5ea..da49814b83a 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -59,7 +59,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
     const auto * non_const_column = typeid_cast<const ColumnUInt64 *>(hash_column);
 
     if (!const_column && !non_const_column)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Hash column must be Const or UInt64.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: hash column must be Const Column or UInt64 Column.");
 
     if (const_column)
     {
diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index 8029d6d405b..db85c804d8d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -53,7 +53,7 @@ public:
         if (const auto & bf_granule = typeid_cast<const MergeTreeIndexGranuleBloomFilter *>(granule.get()))
             return mayBeTrueOnGranule(bf_granule);
 
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Requires bloom filter index granule.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: require bloom filter index granule.");
     }
 
 private:
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
index da89d52a9ff..4e339964de3 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
@@ -54,9 +54,9 @@ MarkType::MarkType(bool adaptive_, bool compressed_, MergeTreeDataPartType::Valu
     : adaptive(adaptive_), compressed(compressed_), part_type(part_type_)
 {
     if (!adaptive && part_type != MergeTreeDataPartType::Wide)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity");
     if (part_type == MergeTreeDataPartType::Unknown)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type");
 }
 
 bool MarkType::isMarkFileExtension(std::string_view extension)
@@ -71,7 +71,7 @@ std::string MarkType::getFileExtension() const
     if (!adaptive)
     {
         if (part_type != MergeTreeDataPartType::Wide)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity");
         return res;
     }
 
@@ -84,7 +84,7 @@ std::string MarkType::getFileExtension() const
         case MergeTreeDataPartType::InMemory:
             return "";
         case MergeTreeDataPartType::Unknown:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type");
     }
 }
 
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index b122674466f..b9e082c0b22 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -500,7 +500,7 @@ protected:
         Chunk chunk;
         if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(),
                 [&](auto kind, auto strictness, auto & map) { chunk = createChunk<kind, strictness>(map); }))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown JOIN strictness");
         return chunk;
     }
 
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 99192fe1e50..c7b0a9d0644 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -241,7 +241,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu
 
             const auto & data_file_it = storage.data_files_by_names.find(data_file_name);
             if (data_file_it == storage.data_files_by_names.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name);
             const auto & data_file = *data_file_it->second;
 
             size_t offset = stream_for_prefix ? 0 : offsets[data_file.index];
@@ -448,7 +448,7 @@ ISerialization::OutputStreamGetter LogSink::createStreamGetter(const NameAndType
         String data_file_name = ISerialization::getFileNameForStream(name_and_type, path);
         auto it = streams.find(data_file_name);
         if (it == streams.end())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Stream was not created when writing data in LogSink");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: stream was not created when writing data in LogSink");
 
         Stream & stream = it->second;
         if (stream.written)
@@ -473,7 +473,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c
         {
             const auto & data_file_it = storage.data_files_by_names.find(data_file_name);
             if (data_file_it == storage.data_files_by_names.end())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name);
 
             const auto & data_file = *data_file_it->second;
             const auto & columns = metadata_snapshot->getColumns();
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a95b3f99b6f..8e1598a1eef 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2050,7 +2050,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
             if (entry.quorum)
             {
                 if (entry.type != LogEntry::GET_PART)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum but type is not GET_PART");
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum but type is not GET_PART");
 
                 LOG_DEBUG(log, "No active replica has part {} which needs to be written with quorum. Will try to mark that quorum as failed.", entry.new_part_name);
 
@@ -2113,7 +2113,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
                         auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
 
                         if (part_info.min_block != part_info.max_block)
-                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum for part covering more than one block number");
+                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum for part covering more than one block number");
 
                         ops.emplace_back(zkutil::makeCreateRequest(
                             fs::path(zookeeper_path) / "quorum" / "failed_parts" / entry.new_part_name,
@@ -6800,7 +6800,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry(
     }
     else
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected name of log node: {}", entry.znode_name);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected name of log node: {}", entry.znode_name);
     }
 
     /** Second - find the corresponding entry in the queue of the specified replica.
@@ -7176,7 +7176,7 @@ void StorageReplicatedMergeTree::fetchPartition(
     }
 
     if (best_replica.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot choose best replica.");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot choose best replica.");
 
     LOG_INFO(log, "Found {} replicas, {} of them are active. Selected {} to fetch from.", replicas.size(), active_replicas.size(), best_replica);
 
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 5679effbcb2..181fd0ac61c 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -207,12 +207,12 @@ void StorageView::read(
 static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query)
 {
     if (!select_query.tables() || select_query.tables()->children.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No table expression in view select AST");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no table expression in view select AST");
 
     auto * select_element = select_query.tables()->children[0]->as<ASTTablesInSelectQueryElement>();
 
     if (!select_element->table_expression)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
 
     return select_element->table_expression->as<ASTTableExpression>();
 }
@@ -243,7 +243,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
 
         }
         if (!table_expression->database_and_table_name)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
     }
 
     DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name);
@@ -270,7 +270,7 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr
     ASTTableExpression * table_expression = getFirstTableExpression(select_query);
 
     if (!table_expression->subquery)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression");
 
     ASTPtr subquery = table_expression->subquery;
     table_expression->subquery = {};
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index 90eb0ad89ec..82a5fd4e33f 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -168,7 +168,7 @@ bool wait(int timeout_ms)
                 continue;   /// Drain delayed notifications.
         }
 
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Read wrong number of bytes from pipe");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: read wrong number of bytes from pipe");
     }
 }
 
diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp
index afc458ea612..4526a38a1c3 100644
--- a/src/Storages/transformQueryForExternalDatabase.cpp
+++ b/src/Storages/transformQueryForExternalDatabase.cpp
@@ -145,7 +145,7 @@ bool isCompatible(ASTPtr & node)
             return false;
 
         if (!function->arguments)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "function->arguments is not set");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set");
 
         String name = function->name;
 
diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.reference b/tests/queries/0_stateless/02985_parser_check_stack_size.reference
deleted file mode 100644
index f83e0818db2..00000000000
--- a/tests/queries/0_stateless/02985_parser_check_stack_size.reference
+++ /dev/null
@@ -1 +0,0 @@
-TOO_DEEP
diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.sh b/tests/queries/0_stateless/02985_parser_check_stack_size.sh
deleted file mode 100755
index c91a0a3eacc..00000000000
--- a/tests/queries/0_stateless/02985_parser_check_stack_size.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-$CLICKHOUSE_CLIENT --query "select 'create table test (x ' || repeat('Array(', 10000) || 'UInt64' || repeat(')', 10000) || ') engine=Memory' format TSVRaw" | $CLICKHOUSE_CURL "${CLICKHOUSE_URL}&max_parser_depth=100000" --data-binary @- | grep -o -F 'TOO_DEEP'
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index a71dac91683..6c12970c4bb 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -448,8 +448,3 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
     xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
     echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
-
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
-    echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."

From 9f37f179005089c58f8f7cdacc081e9b203bcf38 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Feb 2024 10:59:58 +0000
Subject: [PATCH 868/884] Further refactorings to make the code more similar to
 arrayDistance.cpp

---
 src/Functions/array/arrayDistance.cpp   |  26 ++-
 src/Functions/array/arrayDotProduct.cpp | 292 ++++++++++++------------
 2 files changed, 161 insertions(+), 157 deletions(-)

diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp
index 670442c0c79..71564f6fa93 100644
--- a/src/Functions/array/arrayDistance.cpp
+++ b/src/Functions/array/arrayDistance.cpp
@@ -90,17 +90,19 @@ struct L2Distance
         size_t & i_y,
         State<ResultType> & state)
     {
+        static constexpr bool is_float32 = std::is_same_v<ResultType, Float32>;
+
         __m512 sums;
-        if constexpr (std::is_same_v<ResultType, Float32>)
+        if constexpr (is_float32)
             sums = _mm512_setzero_ps();
         else
             sums = _mm512_setzero_pd();
 
-        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+        constexpr size_t n = is_float32 ? 16 : 8;
 
         for (; i_x + n < i_max; i_x += n, i_y += n)
         {
-            if constexpr (std::is_same_v<ResultType, Float32>)
+            if constexpr (is_float32)
             {
                 __m512 x = _mm512_loadu_ps(data_x + i_x);
                 __m512 y = _mm512_loadu_ps(data_y + i_y);
@@ -116,7 +118,7 @@ struct L2Distance
             }
         }
 
-        if constexpr (std::is_same_v<ResultType, Float32>)
+        if constexpr (is_float32)
             state.sum = _mm512_reduce_add_ps(sums);
         else
             state.sum = _mm512_reduce_add_pd(sums);
@@ -247,11 +249,13 @@ struct CosineDistance
         size_t & i_y,
         State<ResultType> & state)
     {
+        static constexpr bool is_float32 = std::is_same_v<ResultType, Float32>;
+
         __m512 dot_products;
         __m512 x_squareds;
         __m512 y_squareds;
 
-        if constexpr (std::is_same_v<ResultType, Float32>)
+        if constexpr (is_float32)
         {
             dot_products = _mm512_setzero_ps();
             x_squareds = _mm512_setzero_ps();
@@ -264,11 +268,11 @@ struct CosineDistance
             y_squareds = _mm512_setzero_pd();
         }
 
-        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
+        constexpr size_t n = is_float32 ? 16 : 8;
 
         for (; i_x + n < i_max; i_x += n, i_y += n)
         {
-            if constexpr (std::is_same_v<ResultType, Float32>)
+            if constexpr (is_float32)
             {
                 __m512 x = _mm512_loadu_ps(data_x + i_x);
                 __m512 y = _mm512_loadu_ps(data_y + i_y);
@@ -286,7 +290,7 @@ struct CosineDistance
             }
         }
 
-        if constexpr (std::is_same_v<ResultType, Float32>)
+        if constexpr (is_float32)
         {
             state.dot_prod = _mm512_reduce_add_ps(dot_products);
             state.x_squared = _mm512_reduce_add_ps(x_squareds);
@@ -312,7 +316,11 @@ template <class Kernel>
 class FunctionArrayDistance : public IFunction
 {
 public:
-    String getName() const override { static auto name = String("array") + Kernel::name + "Distance"; return name; }
+    String getName() const override
+    {
+        static auto name = String("array") + Kernel::name + "Distance";
+        return name;
+    }
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayDistance<Kernel>>(); }
     size_t getNumberOfArguments() const override { return 2; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 2fc162208ae..12b2ce428ee 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -24,14 +24,121 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-template <typename Impl, typename Name>
+
+struct DotProduct
+{
+    static constexpr auto name = "arrayDotProduct";
+
+    static DataTypePtr getReturnType(const DataTypePtr & left, const DataTypePtr & right)
+    {
+        using Types = TypeList<DataTypeFloat32, DataTypeFloat64,
+                               DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
+                               DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64>;
+        Types types;
+
+        DataTypePtr result_type;
+        bool valid = castTypeToEither(types, left.get(), [&](const auto & left_)
+        {
+            return castTypeToEither(types, right.get(), [&](const auto & right_)
+            {
+                using LeftType = typename std::decay_t<decltype(left_)>::FieldType;
+                using RightType = typename std::decay_t<decltype(right_)>::FieldType;
+                using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<LeftType, RightType>::Type;
+
+                if constexpr (std::is_same_v<LeftType, Float32> && std::is_same_v<RightType, Float32>)
+                    result_type = std::make_shared<DataTypeFloat32>();
+                else
+                    result_type = std::make_shared<DataTypeFromFieldType<ResultType>>();
+                return true;
+            });
+        });
+
+        if (!valid)
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Arguments of function {} only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", name);
+        return result_type;
+    }
+
+    template <typename Type>
+    struct State
+    {
+        Type sum = 0;
+    };
+
+    template <typename Type>
+    static void accumulate(State<Type> & state, Type x, Type y)
+    {
+        state.sum += x * y;
+    }
+
+    template <typename Type>
+    static void combine(State<Type> & state, const State<Type> & other_state)
+    {
+        state.sum += other_state.sum;
+    }
+
+#if USE_MULTITARGET_CODE
+    template <typename Type>
+    AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
+        const Type * __restrict data_x,
+        const Type * __restrict data_y,
+        size_t i_max,
+        size_t & i,
+        State<Type> & state)
+    {
+        static constexpr bool is_float32 = std::is_same_v<Type, Float32>;
+
+        __m512 sums;
+        if constexpr (is_float32)
+            sums = _mm512_setzero_ps();
+        else
+            sums = _mm512_setzero_pd();
+
+        constexpr size_t n = is_float32 ? 16 : 8;
+
+        for (; i + n < i_max; i += n)
+        {
+            if constexpr (is_float32)
+            {
+                __m512 x = _mm512_loadu_ps(data_x + i);
+                __m512 y = _mm512_loadu_ps(data_y + i);
+                sums = _mm512_fmadd_ps(x, y, sums);
+            }
+            else
+            {
+                __m512 x = _mm512_loadu_pd(data_x + i);
+                __m512 y = _mm512_loadu_pd(data_y + i);
+                sums = _mm512_fmadd_pd(x, y, sums);
+            }
+        }
+
+        if constexpr (is_float32)
+            state.sum = _mm512_reduce_add_ps(sums);
+        else
+            state.sum = _mm512_reduce_add_pd(sums);
+    }
+#endif
+
+    template <typename Type>
+    static Type finalize(const State<Type> & state)
+    {
+        return state.sum;
+    }
+
+};
+
+
+/// The implementation is modeled after the implementation of distance functions arrayL1Distance, arrayL2Distance, etc.
+/// The main difference is that arrayDotProduct() interfers the result type differently.
+template <typename Kernel>
 class FunctionArrayScalarProduct : public IFunction
 {
 public:
-    static constexpr auto name = Name::name;
+    static constexpr auto name = Kernel::name;
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
     String getName() const override { return name; }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
     size_t getNumberOfArguments() const override { return 2; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
@@ -53,7 +160,7 @@ public:
             nested_types[i] = nested_type;
         }
 
-        return Impl::getReturnType(nested_types[0], nested_types[1]);
+        return Kernel::getReturnType(nested_types[0], nested_types[1]);
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
@@ -155,7 +262,7 @@ private:
     }
 
     template <typename ResultType, typename LeftType, typename RightType>
-    static NO_INLINE void vector(
+    static void vector(
         const PaddedPODArray<LeftType> & left,
         const PaddedPODArray<RightType> & right,
         const ColumnArray::Offsets & offsets,
@@ -165,163 +272,52 @@ private:
         result.resize(size);
 
         ColumnArray::Offset current_offset = 0;
-        for (size_t i = 0; i < size; ++i)
+        for (size_t row = 0; row < size; ++row)
         {
-            size_t array_size = offsets[i] - current_offset;
-            result[i] = Impl::template apply<ResultType, LeftType, RightType>(&left[current_offset], &right[current_offset], array_size);
-            current_offset = offsets[i];
-        }
-    }
+            size_t array_size = offsets[row] - current_offset;
 
-};
-
-struct NameArrayDotProduct
-{
-    static constexpr auto name = "arrayDotProduct";
-};
-
-class ArrayDotProductImpl
-{
-public:
-    static DataTypePtr getReturnType(const DataTypePtr & left, const DataTypePtr & right)
-    {
-        using Types = TypeList<DataTypeFloat32, DataTypeFloat64,
-                               DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
-                               DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64>;
-        Types types;
-
-        DataTypePtr result_type;
-        bool valid = castTypeToEither(types, left.get(), [&](const auto & left_)
-        {
-            return castTypeToEither(types, right.get(), [&](const auto & right_)
-            {
-                using LeftType = typename std::decay_t<decltype(left_)>::FieldType;
-                using RightType = typename std::decay_t<decltype(right_)>::FieldType;
-                using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<LeftType, RightType>::Type;
-
-                if constexpr (std::is_same_v<LeftType, Float32> && std::is_same_v<RightType, Float32>)
-                    result_type = std::make_shared<DataTypeFloat32>();
-                else
-                    result_type = std::make_shared<DataTypeFromFieldType<ResultType>>();
-                return true;
-            });
-        });
-
-        if (!valid)
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Arguments of function {} only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
-                NameArrayDotProduct::name);
-        return result_type;
-    }
-
-    /// Modeled after the implementation of distance functions L1Distance(), L2Distance() etc.
-    template <typename Type>
-    struct State
-    {
-        Type sum = 0;
-
-        void accumulate(Type x, Type y)
-        {
-            sum += x * y;
-        }
-
-        void combine(const State<Type> & other_state)
-        {
-            sum += other_state.sum;
-        }
-
-        Type finalize()
-        {
-            return sum;
-        }
-    };
+            typename Kernel::template State<ResultType> state;
+            size_t i = 0;
 
+            /// SIMD optimization: process multiple elements in both input arrays at once.
+            /// To avoid combinatorial explosion of SIMD kernels, focus on
+            /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
+            ///   10 input types x 8 output types,
+            /// - the most powerful SIMD instruction set (AVX-512F).
 #if USE_MULTITARGET_CODE
-    template <typename ResultType>
-    AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
-        const ResultType * __restrict data_x,
-        const ResultType * __restrict data_y,
-        size_t i_max,
-        size_t & i,
-        State<ResultType> & state)
-    {
-        __m512 sums;
-        if constexpr (std::is_same_v<ResultType, Float32>)
-            sums = _mm512_setzero_ps();
-        else
-            sums = _mm512_setzero_pd();
-
-        const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
-
-        for (; i + n < i_max; i += n)
-        {
-            if constexpr (std::is_same_v<ResultType, Float32>)
+            if constexpr ((std::is_same_v<ResultType, Float32> || std::is_same_v<ResultType, Float64>)
+                            && std::is_same_v<ResultType, LeftType> && std::is_same_v<LeftType, RightType>)
             {
-                __m512 x = _mm512_loadu_ps(data_x + i);
-                __m512 y = _mm512_loadu_ps(data_y + i);
-                sums = _mm512_fmadd_ps(x, y, sums);
+                if (isArchSupported(TargetArch::AVX512F))
+                    Kernel::template accumulateCombine<ResultType>(&left[current_offset], &right[current_offset], array_size, i, state);
             }
-            else
-            {
-                __m512 x = _mm512_loadu_pd(data_x + i);
-                __m512 y = _mm512_loadu_pd(data_y + i);
-                sums = _mm512_fmadd_pd(x, y, sums);
-            }
-        }
-
-        if constexpr (std::is_same_v<ResultType, Float32>)
-            state.sum = _mm512_reduce_add_ps(sums);
-        else
-            state.sum = _mm512_reduce_add_pd(sums);
-    }
-#endif
-
-    template <typename ResultType, typename LeftType, typename RightType>
-    static NO_SANITIZE_UNDEFINED ResultType apply(
-        const LeftType * left,
-        const RightType * right,
-        size_t size)
-    {
-        State<ResultType> state;
-        size_t i = 0;
-
-        /// SIMD optimization: process multiple elements in both input arrays at once.
-        /// To avoid combinatorial explosion of SIMD kernels, focus on
-        /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
-        ///   10 input types x 8 output types,
-        /// - the most powerful SIMD instruction set (AVX-512F).
-#if USE_MULTITARGET_CODE
-        if constexpr ((std::is_same_v<ResultType, Float32> || std::is_same_v<ResultType, Float64>)
-                        && std::is_same_v<ResultType, LeftType> && std::is_same_v<LeftType, RightType>)
-        {
-            if (isArchSupported(TargetArch::AVX512F))
-                accumulateCombine(left, right, size, i, state);
-        }
 #else
-        /// Process chunks in vectorized manner
-        static constexpr size_t VEC_SIZE = 4;
-        State<ResultType> states[VEC_SIZE];
-        for (; i + VEC_SIZE < size; i += VEC_SIZE)
-        {
-            for (size_t j = 0; j < VEC_SIZE; ++j)
-                states[j].accumulate(static_cast<ResultType>(left[i + j]), static_cast<ResultType>(right[i + j]));
-        }
+            /// Process chunks in vectorized manner
+            static constexpr size_t VEC_SIZE = 4;
+            typename Kernel::template State<ResultType> states[VEC_SIZE];
+            for (; i + VEC_SIZE < array_size; i += VEC_SIZE)
+            {
+                for (size_t j = 0; j < VEC_SIZE; ++j)
+                    Kernel::template accumulate<ResultType>(states[j], static_cast<ResultType>(left[i + j]), static_cast<ResultType>(right[i + j]));
+            }
 
-        for (const auto & other_state : states)
-            state.combine(other_state);
+            for (const auto & other_state : states)
+                Kernel::template combine<ResultType>(state, other_state);
 #endif
 
-        /// Process the tail
-        for (; i < size; ++i)
-            state.accumulate(static_cast<ResultType>(left[i]), static_cast<ResultType>(right[i]));
+            /// Process the tail
+            for (; i < array_size; ++i)
+                Kernel::template accumulate<ResultType>(state, static_cast<ResultType>(left[i]), static_cast<ResultType>(right[i]));
 
-        ResultType res = state.finalize();
-        return res;
+            /// ResultType res = Kernel::template finalize<ResultType>(state);
+            result[row] = Kernel::template finalize<ResultType>(state);
+
+            current_offset = offsets[row];
+        }
     }
 };
 
-using FunctionArrayDotProduct = FunctionArrayScalarProduct<ArrayDotProductImpl, NameArrayDotProduct>;
+using FunctionArrayDotProduct = FunctionArrayScalarProduct<DotProduct>;
 
 REGISTER_FUNCTION(ArrayDotProduct)
 {

From 9f51e840db859a617827d6d086cf05f9a78d48f6 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Tue, 13 Feb 2024 12:15:46 +0000
Subject: [PATCH 869/884] CI: integration tests to mysql80  #ci_set_integration
  #no_merge_commit

---
 .../compose/docker_compose_mysql_cluster.yml  |    6 +-
 tests/ci/ci_config.py                         |    8 -
 tests/integration/helpers/cluster.py          |   98 +-
 tests/integration/helpers/external_sources.py |    2 +-
 .../test_mysql.py                             |    8 +-
 .../integration/test_dictionaries_ddl/test.py |   12 +-
 .../configs/dictionaries/mysql_dict1.xml      |    4 +-
 .../configs/dictionaries/mysql_dict2.xml      |    6 +-
 .../configs/named_collections.xml             |    6 +-
 .../test_dictionaries_mysql/test.py           |   16 +-
 .../test_disabled_mysql_server/test.py        |    8 +-
 .../test_mask_sensitive_info/test.py          |   48 +-
 .../test_materialized_mysql_database/test.py  |   10 +-
 .../test_mysql57_database_engine/__init__.py  |    0
 .../configs/named_collections.xml             |   23 +
 .../configs/remote_servers.xml                |   12 +
 .../configs/user.xml                          |   10 +
 .../configs/users.xml                         |    9 +
 .../test_mysql57_database_engine/test.py      | 1074 +++++++++++++++++
 .../configs/named_collections.xml             |    4 +-
 .../test_mysql_database_engine/test.py        |   68 +-
 .../integration/test_odbc_interaction/test.py |   10 +-
 .../configs/named_collections.xml             |    8 +-
 tests/integration/test_storage_mysql/test.py  |   96 +-
 24 files changed, 1334 insertions(+), 212 deletions(-)
 create mode 100644 tests/integration/test_mysql57_database_engine/__init__.py
 create mode 100644 tests/integration/test_mysql57_database_engine/configs/named_collections.xml
 create mode 100644 tests/integration/test_mysql57_database_engine/configs/remote_servers.xml
 create mode 100644 tests/integration/test_mysql57_database_engine/configs/user.xml
 create mode 100644 tests/integration/test_mysql57_database_engine/configs/users.xml
 create mode 100644 tests/integration/test_mysql57_database_engine/test.py

diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
index 73f9e39f0d6..079c451b9d6 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
@@ -1,7 +1,7 @@
 version: '2.3'
 services:
     mysql2:
-        image: mysql:5.7
+        image: mysql:8.0
         restart: always
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
@@ -23,7 +23,7 @@ services:
               source: ${MYSQL_CLUSTER_LOGS:-}
               target: /mysql/
     mysql3:
-        image: mysql:5.7
+        image: mysql:8.0
         restart: always
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
@@ -45,7 +45,7 @@ services:
               source: ${MYSQL_CLUSTER_LOGS:-}
               target: /mysql/
     mysql4:
-        image: mysql:5.7
+        image: mysql:8.0
         restart: always
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 3ebcbb7ed59..f2eaa407c7c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -637,16 +637,8 @@ CI_CONFIG = CIConfig(
         Labels.CI_SET_INTEGRATION: LabelConfig(
             run_jobs=[
                 JobNames.STYLE_CHECK,
-                Build.PACKAGE_ASAN,
                 Build.PACKAGE_RELEASE,
-                Build.PACKAGE_TSAN,
-                Build.PACKAGE_AARCH64,
-                JobNames.INTEGRATION_TEST_ASAN,
-                JobNames.INTEGRATION_TEST_ARM,
                 JobNames.INTEGRATION_TEST,
-                JobNames.INTEGRATION_TEST_ASAN_ANALYZER,
-                JobNames.INTEGRATION_TEST_TSAN,
-                JobNames.INTEGRATION_TEST_FLAKY,
             ]
         ),
         Labels.CI_SET_REDUCED: LabelConfig(
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 1d96563251b..542f757ddd4 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -465,7 +465,7 @@ class ClickHouseCluster:
         self.base_cmd += ["--project-name", self.project_name]
 
         self.base_zookeeper_cmd = None
-        self.base_mysql_cmd = []
+        self.base_mysql57_cmd = []
         self.base_kafka_cmd = []
         self.base_kerberized_kafka_cmd = []
         self.base_kerberos_kdc_cmd = []
@@ -479,7 +479,7 @@ class ClickHouseCluster:
         self.with_zookeeper = False
         self.with_zookeeper_secure = False
         self.with_mysql_client = False
-        self.with_mysql = False
+        self.with_mysql57 = False
         self.with_mysql8 = False
         self.with_mysql_cluster = False
         self.with_postgres = False
@@ -644,12 +644,19 @@ class ClickHouseCluster:
         self.mysql_client_host = "mysql_client"
         self.mysql_client_container = None
 
-        # available when with_mysql == True
-        self.mysql_host = "mysql57"
-        self.mysql_port = 3306
-        self.mysql_ip = None
-        self.mysql_dir = p.abspath(p.join(self.instances_dir, "mysql"))
-        self.mysql_logs_dir = os.path.join(self.mysql_dir, "logs")
+        # available when with_mysql57 == True
+        self.mysql57_host = "mysql57"
+        self.mysql57_port = 3306
+        self.mysql57_ip = None
+        self.mysql57_dir = p.abspath(p.join(self.instances_dir, "mysql"))
+        self.mysql57_logs_dir = os.path.join(self.mysql57_dir, "logs")
+
+        # available when with_mysql8 == True
+        self.mysql8_host = "mysql80"
+        self.mysql8_port = 3306
+        self.mysql8_ip = None
+        self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8"))
+        self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs")
 
         # available when with_mysql_cluster == True
         self.mysql2_host = "mysql2"
@@ -659,14 +666,7 @@ class ClickHouseCluster:
         self.mysql3_ip = None
         self.mysql4_ip = None
         self.mysql_cluster_dir = p.abspath(p.join(self.instances_dir, "mysql"))
-        self.mysql_cluster_logs_dir = os.path.join(self.mysql_dir, "logs")
-
-        # available when with_mysql8 == True
-        self.mysql8_host = "mysql80"
-        self.mysql8_port = 3306
-        self.mysql8_ip = None
-        self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8"))
-        self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs")
+        self.mysql_cluster_logs_dir = os.path.join(self.mysql8_dir, "logs")
 
         # available when with_zookeper_secure == True
         self.zookeeper_secure_port = 2281
@@ -1045,17 +1045,17 @@ class ClickHouseCluster:
 
         return self.base_mysql_client_cmd
 
-    def setup_mysql_cmd(self, instance, env_variables, docker_compose_yml_dir):
-        self.with_mysql = True
-        env_variables["MYSQL_HOST"] = self.mysql_host
-        env_variables["MYSQL_PORT"] = str(self.mysql_port)
+    def setup_mysql57_cmd(self, instance, env_variables, docker_compose_yml_dir):
+        self.with_mysql57 = True
+        env_variables["MYSQL_HOST"] = self.mysql57_host
+        env_variables["MYSQL_PORT"] = str(self.mysql57_port)
         env_variables["MYSQL_ROOT_HOST"] = "%"
-        env_variables["MYSQL_LOGS"] = self.mysql_logs_dir
+        env_variables["MYSQL_LOGS"] = self.mysql57_logs_dir
         env_variables["MYSQL_LOGS_FS"] = "bind"
         self.base_cmd.extend(
             ["--file", p.join(docker_compose_yml_dir, "docker_compose_mysql.yml")]
         )
-        self.base_mysql_cmd = [
+        self.base_mysql57_cmd = [
             "docker-compose",
             "--env-file",
             instance.env_file,
@@ -1065,7 +1065,7 @@ class ClickHouseCluster:
             p.join(docker_compose_yml_dir, "docker_compose_mysql.yml"),
         ]
 
-        return self.base_mysql_cmd
+        return self.base_mysql57_cmd
 
     def setup_mysql8_cmd(self, instance, env_variables, docker_compose_yml_dir):
         self.with_mysql8 = True
@@ -1091,7 +1091,7 @@ class ClickHouseCluster:
 
     def setup_mysql_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir):
         self.with_mysql_cluster = True
-        env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql_port)
+        env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql8_port)
         env_variables["MYSQL_CLUSTER_ROOT_HOST"] = "%"
         env_variables["MYSQL_CLUSTER_LOGS"] = self.mysql_cluster_logs_dir
         env_variables["MYSQL_CLUSTER_LOGS_FS"] = "bind"
@@ -1572,7 +1572,7 @@ class ClickHouseCluster:
         with_zookeeper=False,
         with_zookeeper_secure=False,
         with_mysql_client=False,
-        with_mysql=False,
+        with_mysql57=False,
         with_mysql8=False,
         with_mysql_cluster=False,
         with_kafka=False,
@@ -1676,7 +1676,7 @@ class ClickHouseCluster:
             with_zookeeper=with_zookeeper,
             zookeeper_config_path=self.zookeeper_config_path,
             with_mysql_client=with_mysql_client,
-            with_mysql=with_mysql,
+            with_mysql57=with_mysql57,
             with_mysql8=with_mysql8,
             with_mysql_cluster=with_mysql_cluster,
             with_kafka=with_kafka,
@@ -1767,9 +1767,9 @@ class ClickHouseCluster:
                 )
             )
 
-        if with_mysql and not self.with_mysql:
+        if with_mysql57 and not self.with_mysql57:
             cmds.append(
-                self.setup_mysql_cmd(instance, env_variables, docker_compose_yml_dir)
+                self.setup_mysql57_cmd(instance, env_variables, docker_compose_yml_dir)
             )
 
         if with_mysql8 and not self.with_mysql8:
@@ -1805,9 +1805,9 @@ class ClickHouseCluster:
 
         if with_odbc_drivers and not self.with_odbc_drivers:
             self.with_odbc_drivers = True
-            if not self.with_mysql:
+            if not self.with_mysql8:
                 cmds.append(
-                    self.setup_mysql_cmd(
+                    self.setup_mysql8_cmd(
                         instance, env_variables, docker_compose_yml_dir
                     )
                 )
@@ -2148,8 +2148,8 @@ class ClickHouseCluster:
         logging.error("Can't connect to MySQL Client:{}".format(errors))
         raise Exception("Cannot wait MySQL Client container")
 
-    def wait_mysql_to_start(self, timeout=180):
-        self.mysql_ip = self.get_instance_ip("mysql57")
+    def wait_mysql57_to_start(self, timeout=180):
+        self.mysql57_ip = self.get_instance_ip("mysql57")
         start = time.time()
         errors = []
         while time.time() - start < timeout:
@@ -2157,8 +2157,8 @@ class ClickHouseCluster:
                 conn = pymysql.connect(
                     user=mysql_user,
                     password=mysql_pass,
-                    host=self.mysql_ip,
-                    port=self.mysql_port,
+                    host=self.mysql57_ip,
+                    port=self.mysql57_port,
                 )
                 conn.close()
                 logging.debug("Mysql Started")
@@ -2205,7 +2205,7 @@ class ClickHouseCluster:
                         user=mysql_user,
                         password=mysql_pass,
                         host=ip,
-                        port=self.mysql_port,
+                        port=self.mysql8_port,
                     )
                     conn.close()
                     logging.debug(f"Mysql Started {ip}")
@@ -2752,15 +2752,15 @@ class ClickHouseCluster:
                 subprocess_check_call(self.base_mysql_client_cmd + common_opts)
                 self.wait_mysql_client_to_start()
 
-            if self.with_mysql and self.base_mysql_cmd:
+            if self.with_mysql57 and self.base_mysql57_cmd:
                 logging.debug("Setup MySQL")
-                if os.path.exists(self.mysql_dir):
-                    shutil.rmtree(self.mysql_dir)
-                os.makedirs(self.mysql_logs_dir)
-                os.chmod(self.mysql_logs_dir, stat.S_IRWXU | stat.S_IRWXO)
-                subprocess_check_call(self.base_mysql_cmd + common_opts)
+                if os.path.exists(self.mysql57_dir):
+                    shutil.rmtree(self.mysql57_dir)
+                os.makedirs(self.mysql57_logs_dir)
+                os.chmod(self.mysql57_logs_dir, stat.S_IRWXU | stat.S_IRWXO)
+                subprocess_check_call(self.base_mysql57_cmd + common_opts)
                 self.up_called = True
-                self.wait_mysql_to_start()
+                self.wait_mysql57_to_start()
 
             if self.with_mysql8 and self.base_mysql8_cmd:
                 logging.debug("Setup MySQL 8")
@@ -2775,7 +2775,7 @@ class ClickHouseCluster:
                 print("Setup MySQL")
                 if os.path.exists(self.mysql_cluster_dir):
                     shutil.rmtree(self.mysql_cluster_dir)
-                os.makedirs(self.mysql_cluster_logs_dir)
+                os.makedirs(self.mysql_cluster_logs_dir, exist_ok=True)
                 os.chmod(self.mysql_cluster_logs_dir, stat.S_IRWXU | stat.S_IRWXO)
 
                 subprocess_check_call(self.base_mysql_cluster_cmd + common_opts)
@@ -3239,7 +3239,7 @@ class ClickHouseInstance:
         with_zookeeper,
         zookeeper_config_path,
         with_mysql_client,
-        with_mysql,
+        with_mysql57,
         with_mysql8,
         with_mysql_cluster,
         with_kafka,
@@ -3324,7 +3324,7 @@ class ClickHouseInstance:
         self.library_bridge_bin_path = library_bridge_bin_path
 
         self.with_mysql_client = with_mysql_client
-        self.with_mysql = with_mysql
+        self.with_mysql57 = with_mysql57
         self.with_mysql8 = with_mysql8
         self.with_mysql_cluster = with_mysql_cluster
         self.with_postgres = with_postgres
@@ -3368,7 +3368,7 @@ class ClickHouseInstance:
         self.env_file = self.cluster.env_file
         if with_odbc_drivers:
             self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
-            self.with_mysql = True
+            self.with_mysql8 = True
         else:
             self.odbc_ini_path = ""
 
@@ -4294,7 +4294,7 @@ class ClickHouseInstance:
                     "Database": odbc_mysql_db,
                     "Uid": odbc_mysql_uid,
                     "Pwd": odbc_mysql_pass,
-                    "Server": self.cluster.mysql_host,
+                    "Server": self.cluster.mysql8_host,
                 },
                 "PostgreSQL": {
                     "DSN": "postgresql_odbc",
@@ -4482,14 +4482,14 @@ class ClickHouseInstance:
         if self.with_mysql_client:
             depends_on.append(self.cluster.mysql_client_host)
 
-        if self.with_mysql:
+        if self.with_mysql57:
             depends_on.append("mysql57")
 
         if self.with_mysql8:
             depends_on.append("mysql80")
 
         if self.with_mysql_cluster:
-            depends_on.append("mysql57")
+            depends_on.append("mysql80")
             depends_on.append("mysql2")
             depends_on.append("mysql3")
             depends_on.append("mysql4")
diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py
index cccf151e73e..033a2f84fa2 100644
--- a/tests/integration/helpers/external_sources.py
+++ b/tests/integration/helpers/external_sources.py
@@ -119,7 +119,7 @@ class SourceMySQL(ExternalSource):
 
     def prepare(self, structure, table_name, cluster):
         if self.internal_hostname is None:
-            self.internal_hostname = cluster.mysql_ip
+            self.internal_hostname = cluster.mysql8_ip
         self.create_mysql_conn()
         self.execute_mysql_query(
             "create database if not exists test default character set 'utf8'"
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
index 77b2c0741b5..afae8d616b0 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
@@ -29,9 +29,9 @@ def setup_module(module):
     SOURCE = SourceMySQL(
         "MySQL",
         None,
-        cluster.mysql_port,
-        cluster.mysql_host,
-        cluster.mysql_port,
+        cluster.mysql8_port,
+        cluster.mysql8_host,
+        cluster.mysql8_port,
         "root",
         "clickhouse",
     )
@@ -53,7 +53,7 @@ def setup_module(module):
     dictionaries = simple_tester.list_dictionaries()
 
     node = cluster.add_instance(
-        "node", main_configs=main_configs, dictionaries=dictionaries, with_mysql=True
+        "node", main_configs=main_configs, dictionaries=dictionaries, with_mysql8=True
     )
 
 
diff --git a/tests/integration/test_dictionaries_ddl/test.py b/tests/integration/test_dictionaries_ddl/test.py
index 7dda6fc245a..2f31e406735 100644
--- a/tests/integration/test_dictionaries_ddl/test.py
+++ b/tests/integration/test_dictionaries_ddl/test.py
@@ -13,7 +13,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
     "node1",
-    with_mysql=True,
+    with_mysql8=True,
     dictionaries=["configs/dictionaries/simple_dictionary.xml"],
     main_configs=[
         "configs/ssl_conf.xml",
@@ -26,7 +26,7 @@ node1 = cluster.add_instance(
 )
 node2 = cluster.add_instance(
     "node2",
-    with_mysql=True,
+    with_mysql8=True,
     dictionaries=["configs/dictionaries/simple_dictionary.xml"],
     main_configs=[
         "configs/dictionaries/lazy_load.xml",
@@ -117,7 +117,7 @@ def started_cluster():
 )
 def test_create_and_select_mysql(started_cluster, clickhouse, name, layout):
     mysql_conn = create_mysql_conn(
-        "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+        "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
     )
     execute_mysql_query(mysql_conn, "DROP DATABASE IF EXISTS create_and_select")
     execute_mysql_query(mysql_conn, "CREATE DATABASE create_and_select")
@@ -152,7 +152,7 @@ def test_create_and_select_mysql(started_cluster, clickhouse, name, layout):
         DB 'create_and_select'
         TABLE '{}'
         REPLICA(PRIORITY 1 HOST '127.0.0.1' PORT 3333)
-        REPLICA(PRIORITY 2 HOST 'mysql57' PORT 3306)
+        REPLICA(PRIORITY 2 HOST 'mysql80' PORT 3306)
     ))
     {}
     LIFETIME(MIN 1 MAX 3)
@@ -367,7 +367,7 @@ def test_file_dictionary_restrictions(started_cluster):
 
 def test_dictionary_with_where(started_cluster):
     mysql_conn = create_mysql_conn(
-        "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+        "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
     )
     execute_mysql_query(
         mysql_conn, "CREATE DATABASE IF NOT EXISTS dictionary_with_where"
@@ -393,7 +393,7 @@ def test_dictionary_with_where(started_cluster):
         PASSWORD 'clickhouse'
         DB 'dictionary_with_where'
         TABLE 'special_table'
-        REPLICA(PRIORITY 1 HOST 'mysql57' PORT 3306)
+        REPLICA(PRIORITY 1 HOST 'mysql80' PORT 3306)
         WHERE 'value1 = \\'qweqwe\\' OR value1 = \\'\\\\u3232\\''
     ))
     LAYOUT(FLAT())
diff --git a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml
index d4d2466b7bf..737c4e0e54f 100644
--- a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml
+++ b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml
@@ -4,7 +4,7 @@
       <source>
          <mysql >
             <db>test</db>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <user>root</user>
             <password>clickhouse</password>
@@ -40,7 +40,7 @@
       <source>
          <mysql >
             <db>test</db>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <user>root</user>
             <password>clickhouse</password>
diff --git a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml
index 031afbba636..ca65dba44e6 100644
--- a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml
+++ b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml
@@ -4,7 +4,7 @@
       <source>
          <mysql >
             <db>test</db>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <user>root</user>
             <password>clickhouse</password>
@@ -41,7 +41,7 @@
       <source>
          <mysql >
             <db>test</db>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <user>root</user>
             <password>clickhouse</password>
@@ -77,7 +77,7 @@
       <source>
          <mysql >
             <db>test</db>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <user>root</user>
             <password>clickhouse</password>
diff --git a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml
index 6e4098c4e4a..0f1a06d7812 100644
--- a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml
+++ b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml
@@ -3,7 +3,7 @@
         <mysql1>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>test</database>
             <table>test_table</table>
@@ -16,7 +16,7 @@
         <mysql3>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>1111</port>
             <database>test</database>
             <table>test_table</table>
@@ -24,7 +24,7 @@
         <mysql4>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>test</database>
             <table>test_table</table>
diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py
index ee0d957b8a9..360456b2046 100644
--- a/tests/integration/test_dictionaries_mysql/test.py
+++ b/tests/integration/test_dictionaries_mysql/test.py
@@ -14,7 +14,7 @@ instance = cluster.add_instance(
     "instance",
     main_configs=CONFIG_FILES,
     user_configs=USER_CONFIGS,
-    with_mysql=True,
+    with_mysql8=True,
     dictionaries=DICTS,
 )
 
@@ -47,7 +47,7 @@ def started_cluster():
 
         # Create database in ClickChouse using MySQL protocol (will be used for data insertion)
         instance.query(
-            "CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql57:3306', 'test', 'root', 'clickhouse')"
+            "CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql80:3306', 'test', 'root', 'clickhouse')"
         )
 
         yield cluster
@@ -86,7 +86,7 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster):
     PRIMARY KEY id
     LAYOUT(FLAT())
     SOURCE(MYSQL(
-        HOST 'mysql57'
+        HOST 'mysql80'
         PORT 3306
         USER 'root'
         PASSWORD 'clickhouse'
@@ -135,7 +135,7 @@ def test_mysql_dictionaries_custom_query_partial_load_simple_key(started_cluster
     PRIMARY KEY id
     LAYOUT(DIRECT())
     SOURCE(MYSQL(
-        HOST 'mysql57'
+        HOST 'mysql80'
         PORT 3306
         USER 'root'
         PASSWORD 'clickhouse'
@@ -186,7 +186,7 @@ def test_mysql_dictionaries_custom_query_partial_load_complex_key(started_cluste
     PRIMARY KEY id, id_key
     LAYOUT(COMPLEX_KEY_DIRECT())
     SOURCE(MYSQL(
-        HOST 'mysql57'
+        HOST 'mysql80'
         PORT 3306
         USER 'root'
         PASSWORD 'clickhouse'
@@ -372,13 +372,13 @@ def get_mysql_conn(started_cluster):
                 conn = pymysql.connect(
                     user="root",
                     password="clickhouse",
-                    host=started_cluster.mysql_ip,
-                    port=started_cluster.mysql_port,
+                    host=started_cluster.mysql8_ip,
+                    port=started_cluster.mysql8_port,
                 )
             else:
                 conn.ping(reconnect=True)
             logging.debug(
-                f"MySQL Connection establised: {started_cluster.mysql_ip}:{started_cluster.mysql_port}"
+                f"MySQL Connection establised: {started_cluster.mysql8_ip}:{started_cluster.mysql8_port}"
             )
             return conn
         except Exception as e:
diff --git a/tests/integration/test_disabled_mysql_server/test.py b/tests/integration/test_disabled_mysql_server/test.py
index 6a4df3fc0b4..814aebb0d8e 100644
--- a/tests/integration/test_disabled_mysql_server/test.py
+++ b/tests/integration/test_disabled_mysql_server/test.py
@@ -11,7 +11,7 @@ from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 clickhouse_node = cluster.add_instance(
-    "node1", main_configs=["configs/remote_servers.xml"], with_mysql=True
+    "node1", main_configs=["configs/remote_servers.xml"], with_mysql8=True
 )
 
 
@@ -27,8 +27,8 @@ def started_cluster():
 class MySQLNodeInstance:
     def __init__(self, started_cluster, user="root", password="clickhouse"):
         self.user = user
-        self.port = cluster.mysql_port
-        self.hostname = cluster.mysql_ip
+        self.port = cluster.mysql8_port
+        self.hostname = cluster.mysql8_ip
         self.password = password
         self.mysql_connection = None  # lazy init
 
@@ -62,7 +62,7 @@ def test_disabled_mysql_server(started_cluster):
 
     with PartitionManager() as pm:
         clickhouse_node.query(
-            "CREATE DATABASE test_db_disabled ENGINE = MySQL('mysql57:3306', 'test_db_disabled', 'root', 'clickhouse')"
+            "CREATE DATABASE test_db_disabled ENGINE = MySQL('mysql80:3306', 'test_db_disabled', 'root', 'clickhouse')"
         )
 
         pm._add_rule(
diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py
index ec34c181371..736f1b3cf71 100644
--- a/tests/integration/test_mask_sensitive_info/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@@ -175,7 +175,7 @@ def test_create_table():
     password = new_password()
 
     table_engines = [
-        f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
+        f"MySQL('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
         f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
         f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')",
         f"S3('http://minio1:9001/root/data/test1.csv')",
@@ -183,9 +183,9 @@ def test_create_table():
         f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
         f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')",
         f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')",
-        f"MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')",
-        f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')",
-        f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')",
+        f"MySQL(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')",
+        f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')",
+        f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')",
         f"PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '{password}')",
         f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')",
         f"S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '{password}', format = 'CSV')",
@@ -219,7 +219,7 @@ def test_create_table():
         assert (
             node.query(f"SHOW CREATE TABLE table0 {show_secrets}={toggle}")
             == "CREATE TABLE default.table0\\n(\\n    `x` Int32\\n)\\n"
-            "ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', "
+            "ENGINE = MySQL(\\'mysql80:3306\\', \\'mysql_db\\', "
             f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')\n"
         )
 
@@ -228,16 +228,16 @@ def test_create_table():
         ) == TSV(
             [
                 [
-                    "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', "
+                    "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql80:3306\\', \\'mysql_db\\', "
                     f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
-                    f"MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
+                    f"MySQL(\\'mysql80:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
                 ],
             ]
         )
 
     check_logs(
         must_contain=[
-            "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+            "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
             "CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
             "CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')",
             "CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')",
@@ -245,9 +245,9 @@ def test_create_table():
             "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
             "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')",
             "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')",
-            "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')",
-            "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')",
-            "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')",
+            "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')",
+            "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')",
+            "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')",
             "CREATE TABLE table11 (`x` int) ENGINE = PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '[HIDDEN]')",
             "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'",
             "CREATE TABLE table13 (`x` int) ENGINE = S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]', format = 'CSV')",
@@ -320,7 +320,7 @@ def test_table_functions():
     password = new_password()
 
     table_functions = [
-        f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
+        f"mysql('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
         f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
         f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')",
         f"s3('http://minio1:9001/root/data/test1.csv')",
@@ -345,7 +345,7 @@ def test_table_functions():
         f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())",
         f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')",
         f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())",
-        f"mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')",
+        f"mysql(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')",
         f"postgresql(named_collection_2, password = '{password}', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')",
         f"s3(named_collection_2, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '{password}')",
         f"remote(named_collection_6, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}', sharding_key = rand())",
@@ -377,7 +377,7 @@ def test_table_functions():
         assert (
             node.query(f"SHOW CREATE TABLE tablefunc0 {show_secrets}={toggle}")
             == "CREATE TABLE default.tablefunc0\\n(\\n    `x` Int32\\n) AS "
-            "mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', "
+            "mysql(\\'mysql80:3306\\', \\'mysql_db\\', \\'mysql_table\\', "
             f"\\'mysql_user\\', \\'{secret}\\')\n"
         )
 
@@ -387,7 +387,7 @@ def test_table_functions():
         ) == TSV(
             [
                 [
-                    "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql57:3306\\', "
+                    "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql80:3306\\', "
                     f"\\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
                     "",
                 ],
@@ -396,7 +396,7 @@ def test_table_functions():
 
     check_logs(
         must_contain=[
-            "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+            "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
             "CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
             "CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')",
             "CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')",
@@ -421,7 +421,7 @@ def test_table_functions():
             "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())",
             "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')",
             "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())",
-            "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')",
+            "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')",
             "CREATE TABLE tablefunc26 (`x` int) AS postgresql(named_collection_2, password = '[HIDDEN]', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')",
             "CREATE TABLE tablefunc27 (`x` int) AS s3(named_collection_2, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]')",
             "CREATE TABLE tablefunc28 (`x` int) AS remote(named_collection_6, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]', sharding_key = rand())",
@@ -567,31 +567,31 @@ def test_on_cluster():
     password = new_password()
 
     node.query(
-        f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')"
+        f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')"
     )
 
     check_logs(
         must_contain=[
-            "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+            "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
         ],
         must_not_contain=[password],
     )
 
     # Check logs of DDLWorker during executing of this query.
     assert node.contains_in_log(
-        "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+        "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
     )
     assert node.contains_in_log(
-        "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+        "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
     )
     assert node.contains_in_log(
-        "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+        "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
     )
     assert node.contains_in_log(
-        "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+        "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
     )
     assert system_query_log_contains_search_pattern(
-        "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
+        "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql80:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
     )
 
     node.query("DROP TABLE table_oncl")
diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py
index 727188a4b86..89c69c42adc 100644
--- a/tests/integration/test_materialized_mysql_database/test.py
+++ b/tests/integration/test_materialized_mysql_database/test.py
@@ -21,7 +21,7 @@ node_db = cluster.add_instance(
     "node1",
     main_configs=["configs/timezone_config.xml"],
     user_configs=["configs/users.xml"],
-    with_mysql=True,
+    with_mysql57=True,
     with_mysql8=True,
     stay_alive=True,
 )
@@ -29,14 +29,16 @@ node_disable_bytes_settings = cluster.add_instance(
     "node2",
     main_configs=["configs/timezone_config.xml"],
     user_configs=["configs/users_disable_bytes_settings.xml"],
-    with_mysql=False,
+    with_mysql57=False,
+    with_mysql8=False,
     stay_alive=True,
 )
 node_disable_rows_settings = cluster.add_instance(
     "node3",
     main_configs=["configs/timezone_config.xml"],
     user_configs=["configs/users_disable_rows_settings.xml"],
-    with_mysql=False,
+    with_mysql57=False,
+    with_mysql8=False,
     stay_alive=True,
 )
 
@@ -125,7 +127,7 @@ class MySQLConnection:
 @pytest.fixture(scope="module")
 def started_mysql_5_7():
     mysql_node = MySQLConnection(
-        cluster.mysql_port, "root", "clickhouse", cluster.mysql_ip
+        cluster.mysql57_port, "root", "clickhouse", cluster.mysql57_ip
     )
     yield mysql_node
 
diff --git a/tests/integration/test_mysql57_database_engine/__init__.py b/tests/integration/test_mysql57_database_engine/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_mysql57_database_engine/configs/named_collections.xml b/tests/integration/test_mysql57_database_engine/configs/named_collections.xml
new file mode 100644
index 00000000000..3b65536f20f
--- /dev/null
+++ b/tests/integration/test_mysql57_database_engine/configs/named_collections.xml
@@ -0,0 +1,23 @@
+<clickhouse>
+    <named_collections>
+        <mysql1>
+            <user>root</user>
+            <password>clickhouse</password>
+            <host>mysql57</host>
+            <port>3306</port>
+            <database>test_database</database>
+        </mysql1>
+        <mysql2>
+            <user>postgres</user>
+            <password>mysecretpassword</password>
+            <host>postgres1</host>
+        </mysql2>
+        <mysql3>
+            <user>root</user>
+            <password>clickhouse</password>
+            <host>mysql57</host>
+            <port>1111</port>
+            <database>clickhouse</database>
+        </mysql3>
+    </named_collections>
+</clickhouse>
diff --git a/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml b/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml
new file mode 100644
index 00000000000..9c7f02c190f
--- /dev/null
+++ b/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_mysql57_database_engine/configs/user.xml b/tests/integration/test_mysql57_database_engine/configs/user.xml
new file mode 100644
index 00000000000..775c63350b0
--- /dev/null
+++ b/tests/integration/test_mysql57_database_engine/configs/user.xml
@@ -0,0 +1,10 @@
+<clickhouse>
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+            <quota>default</quota>
+            <named_collection_control>1</named_collection_control>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_mysql57_database_engine/configs/users.xml b/tests/integration/test_mysql57_database_engine/configs/users.xml
new file mode 100644
index 00000000000..4b6ba057ecb
--- /dev/null
+++ b/tests/integration/test_mysql57_database_engine/configs/users.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+            <named_collection_control>1</named_collection_control>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_mysql57_database_engine/test.py b/tests/integration/test_mysql57_database_engine/test.py
new file mode 100644
index 00000000000..a5a13a88b1b
--- /dev/null
+++ b/tests/integration/test_mysql57_database_engine/test.py
@@ -0,0 +1,1074 @@
+import contextlib
+import time
+from string import Template
+
+import pymysql.cursors
+import pytest
+from helpers.client import QueryRuntimeException
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+clickhouse_node = cluster.add_instance(
+    "node1",
+    main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"],
+    user_configs=["configs/users.xml"],
+    with_mysql57=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+class MySQLNodeInstance:
+    def __init__(self, user, password, hostname, port):
+        self.user = user
+        self.port = port
+        self.hostname = hostname
+        self.password = password
+        self.mysql_connection = None  # lazy init
+        self.ip_address = hostname
+
+    def query(self, execution_query):
+        if self.mysql_connection is None:
+            self.mysql_connection = pymysql.connect(
+                user=self.user,
+                password=self.password,
+                host=self.hostname,
+                port=self.port,
+            )
+        with self.mysql_connection.cursor() as cursor:
+
+            def execute(query):
+                res = cursor.execute(query)
+                if query.lstrip().lower().startswith(("select", "show")):
+                    # Mimic output of the ClickHouseInstance, which is:
+                    # tab-sparated values and newline (\n)-separated rows.
+                    rows = []
+                    for row in cursor.fetchall():
+                        rows.append("\t".join(str(item) for item in row))
+                    res = "\n".join(rows)
+                return res
+
+            if isinstance(execution_query, (str, bytes)):
+                return execute(execution_query)
+            else:
+                return [execute(q) for q in execution_query]
+
+    def close(self):
+        if self.mysql_connection is not None:
+            self.mysql_connection.close()
+
+
+def test_mysql_ddl_for_mysql_database(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+        )
+        assert "test_database" in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database")
+
+        time.sleep(
+            3
+        )  # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained
+        mysql_node.query(
+            "ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11)"
+        )
+        assert "add_column" in clickhouse_node.query(
+            "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'"
+        )
+
+        time.sleep(
+            3
+        )  # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained
+        mysql_node.query(
+            "ALTER TABLE `test_database`.`test_table` DROP COLUMN `add_column`"
+        )
+        assert "add_column" not in clickhouse_node.query(
+            "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'"
+        )
+
+        mysql_node.query("DROP TABLE `test_database`.`test_table`;")
+        assert "test_table" not in clickhouse_node.query(
+            "SHOW TABLES FROM test_database"
+        )
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        assert "test_database" not in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query("DROP DATABASE test_database")
+
+
+def test_clickhouse_ddl_for_mysql_database(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+        )
+
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database")
+        clickhouse_node.query("DROP TABLE test_database.test_table")
+        assert "test_table" not in clickhouse_node.query(
+            "SHOW TABLES FROM test_database"
+        )
+        clickhouse_node.query("ATTACH TABLE test_database.test_table")
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database")
+        clickhouse_node.query("DETACH TABLE test_database.test_table")
+        assert "test_table" not in clickhouse_node.query(
+            "SHOW TABLES FROM test_database"
+        )
+        clickhouse_node.query("ATTACH TABLE test_database.test_table")
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database")
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        assert "test_database" not in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query("DROP DATABASE test_database")
+
+
+def test_clickhouse_dml_for_mysql_database(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;"
+        )
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')"
+        )
+
+        assert (
+            clickhouse_node.query(
+                "SELECT count() FROM `test_database`.`test_table`"
+            ).rstrip()
+            == "0"
+        )
+        clickhouse_node.query(
+            "INSERT INTO `test_database`.`test_table`(`i``d`) select number from numbers(10000)"
+        )
+        assert (
+            clickhouse_node.query(
+                "SELECT count() FROM `test_database`.`test_table`"
+            ).rstrip()
+            == "10000"
+        )
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        assert "test_database" not in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query("DROP DATABASE test_database")
+
+
+def test_clickhouse_join_for_mysql_database(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test")
+        mysql_node.query("CREATE DATABASE test DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE test.t1_mysql_local ("
+            "pays    VARCHAR(55) DEFAULT 'FRA' NOT NULL,"
+            "service VARCHAR(5)  DEFAULT ''    NOT NULL,"
+            "opco    CHAR(3)     DEFAULT ''    NOT NULL"
+            ")"
+        )
+        mysql_node.query(
+            "CREATE TABLE test.t2_mysql_local ("
+            "service VARCHAR(5) DEFAULT '' NOT NULL,"
+            "opco    VARCHAR(5) DEFAULT ''"
+            ")"
+        )
+        clickhouse_node.query("DROP TABLE IF EXISTS default.t1_remote_mysql SYNC")
+        clickhouse_node.query("DROP TABLE IF EXISTS default.t2_remote_mysql SYNC")
+        clickhouse_node.query(
+            "CREATE TABLE default.t1_remote_mysql AS mysql('mysql57:3306','test','t1_mysql_local','root','clickhouse')"
+        )
+        clickhouse_node.query(
+            "CREATE TABLE default.t2_remote_mysql AS mysql('mysql57:3306','test','t2_mysql_local','root','clickhouse')"
+        )
+        clickhouse_node.query(
+            "INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')"
+        )
+        clickhouse_node.query(
+            "INSERT INTO `default`.`t2_remote_mysql` VALUES ('A','AAA'),('Z','')"
+        )
+
+        assert (
+            clickhouse_node.query(
+                "SELECT s.pays "
+                "FROM default.t1_remote_mysql AS s "
+                "LEFT JOIN default.t1_remote_mysql AS s_ref "
+                "ON (s_ref.opco = s.opco AND s_ref.service = s.service) "
+                "WHERE s_ref.opco != '' AND s.opco != '' "
+            ).rstrip()
+            == "RU"
+        )
+        mysql_node.query("DROP DATABASE test")
+
+
+def test_bad_arguments_for_mysql_database_engine(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            port=started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        with pytest.raises(QueryRuntimeException) as exception:
+            mysql_node.query(
+                "CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'"
+            )
+            clickhouse_node.query(
+                "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql57:3306', test_bad_arguments, root, 'clickhouse')"
+            )
+        assert "Database engine MySQL requested literal argument." in str(
+            exception.value
+        )
+        mysql_node.query("DROP DATABASE test_bad_arguments")
+
+
+def test_column_comments_for_mysql_database_engine(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+        )
+        assert "test_database" in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`), `test` int COMMENT 'test comment') ENGINE=InnoDB;"
+        )
+        assert "test comment" in clickhouse_node.query(
+            "DESCRIBE TABLE `test_database`.`test_table`"
+        )
+
+        time.sleep(
+            3
+        )  # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained
+        mysql_node.query(
+            "ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'"
+        )
+        assert "add_column comment" in clickhouse_node.query(
+            "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'"
+        )
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        mysql_node.query("DROP DATABASE test_database")
+
+
+def test_data_types_support_level_for_mysql_database_engine(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test")
+        mysql_node.query(
+            "CREATE DATABASE IF NOT EXISTS test DEFAULT CHARACTER SET 'utf8'"
+        )
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse')",
+            settings={"mysql_datatypes_support_level": "decimal,datetime64"},
+        )
+
+        assert (
+            "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'"
+            in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV")
+        )
+        clickhouse_node.query("DETACH DATABASE test_database")
+
+        # without context settings
+        clickhouse_node.query("ATTACH DATABASE test_database")
+        assert (
+            "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'"
+            in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV")
+        )
+
+        clickhouse_node.query(
+            "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'",
+            settings={"mysql_datatypes_support_level": "decimal"},
+        )
+
+        assert (
+            "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'"
+            in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV")
+        )
+        clickhouse_node.query("DETACH DATABASE test_database_1")
+
+        # without context settings
+        clickhouse_node.query("ATTACH DATABASE test_database_1")
+        assert (
+            "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'"
+            in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV")
+        )
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        clickhouse_node.query("DROP DATABASE test_database_1")
+        assert "test_database" not in clickhouse_node.query("SHOW DATABASES")
+        mysql_node.query("DROP DATABASE test")
+
+
+float_values = [0, "NULL"]
+clickhouse_float_values = [0, "\\N"]
+int32_values = [0, 1, -1, 2147483647, -2147483648]
+uint32_values = [
+    0,
+    1,
+]  # [FIXME] seems client have issue with value 4294967295, it returns -1 for it
+mint_values = [0, 1, -1, 8388607, -8388608]
+umint_values = [0, 1, 16777215]
+int16_values = [0, 1, -1, 32767, -32768]
+uint16_values = [0, 1, 65535]
+int8_values = [0, 1, -1, 127, -128]
+uint8_values = [0, 1, 255]
+string_values = ["'ClickHouse'", "NULL"]
+clickhouse_string_values = ["ClickHouse", "\\N"]
+date_values = ["'1970-01-01'"]
+date2Date32_values = ["'1925-01-01'", "'2283-11-11'"]
+date2String_values = ["'1000-01-01'", "'9999-12-31'"]
+
+
+decimal_values = [
+    0,
+    0.123,
+    0.4,
+    5.67,
+    8.91011,
+    123456789.123,
+    -0.123,
+    -0.4,
+    -5.67,
+    -8.91011,
+    -123456789.123,
+]
+timestamp_values = ["'2015-05-18 07:40:01.123'", "'2019-09-16 19:20:11.123'"]
+timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11'"]
+
+
+def arryToString(expected_clickhouse_values):
+    return "\n".join(str(value) for value in expected_clickhouse_values)
+
+
+#  if expected_clickhouse_values is "", compare MySQL and ClickHouse query results directly
+@pytest.mark.parametrize(
+    "case_name, mysql_type, expected_ch_type, mysql_values, expected_clickhouse_values, setting_mysql_datatypes_support_level",
+    [
+        pytest.param(
+            "common_types",
+            "FLOAT",
+            "Nullable(Float32)",
+            float_values,
+            clickhouse_float_values,
+            "",
+            id="float_1",
+        ),
+        pytest.param(
+            "common_types",
+            "FLOAT UNSIGNED",
+            "Nullable(Float32)",
+            float_values,
+            clickhouse_float_values,
+            "",
+            id="float_2",
+        ),
+        pytest.param(
+            "common_types",
+            "INT",
+            "Nullable(Int32)",
+            int32_values,
+            int32_values,
+            "",
+            id="common_types_1",
+        ),
+        pytest.param(
+            "common_types",
+            "INT NOT NULL",
+            "Int32",
+            int32_values,
+            int32_values,
+            "",
+            id="common_types_2",
+        ),
+        pytest.param(
+            "common_types",
+            "INT UNSIGNED NOT NULL",
+            "UInt32",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_3",
+        ),
+        pytest.param(
+            "common_types",
+            "INT UNSIGNED",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_4",
+        ),
+        pytest.param(
+            "common_types",
+            "INT UNSIGNED DEFAULT NULL",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_5",
+        ),
+        pytest.param(
+            "common_types",
+            "INT UNSIGNED DEFAULT '1'",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_6",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10)",
+            "Nullable(Int32)",
+            int32_values,
+            int32_values,
+            "",
+            id="common_types_7",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10) NOT NULL",
+            "Int32",
+            int32_values,
+            int32_values,
+            "",
+            id="common_types_8",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10) UNSIGNED NOT NULL",
+            "UInt32",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_8",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10) UNSIGNED",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_9",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10) UNSIGNED DEFAULT NULL",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_10",
+        ),
+        pytest.param(
+            "common_types",
+            "INT(10) UNSIGNED DEFAULT '1'",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_11",
+        ),
+        pytest.param(
+            "common_types",
+            "INTEGER",
+            "Nullable(Int32)",
+            int32_values,
+            int32_values,
+            "",
+            id="common_types_12",
+        ),
+        pytest.param(
+            "common_types",
+            "INTEGER UNSIGNED",
+            "Nullable(UInt32)",
+            uint32_values,
+            uint32_values,
+            "",
+            id="common_types_13",
+        ),
+        pytest.param(
+            "common_types",
+            "MEDIUMINT",
+            "Nullable(Int32)",
+            mint_values,
+            mint_values,
+            "",
+            id="common_types_14",
+        ),
+        pytest.param(
+            "common_types",
+            "MEDIUMINT UNSIGNED",
+            "Nullable(UInt32)",
+            umint_values,
+            umint_values,
+            "",
+            id="common_types_15",
+        ),
+        pytest.param(
+            "common_types",
+            "SMALLINT",
+            "Nullable(Int16)",
+            int16_values,
+            int16_values,
+            "",
+            id="common_types_16",
+        ),
+        pytest.param(
+            "common_types",
+            "SMALLINT UNSIGNED",
+            "Nullable(UInt16)",
+            uint16_values,
+            uint16_values,
+            "",
+            id="common_types_17",
+        ),
+        pytest.param(
+            "common_types",
+            "TINYINT",
+            "Nullable(Int8)",
+            int8_values,
+            int8_values,
+            "",
+            id="common_types_18",
+        ),
+        pytest.param(
+            "common_types",
+            "TINYINT UNSIGNED",
+            "Nullable(UInt8)",
+            uint8_values,
+            uint8_values,
+            "",
+            id="common_types_19",
+        ),
+        pytest.param(
+            "common_types",
+            "VARCHAR(10)",
+            "Nullable(String)",
+            string_values,
+            clickhouse_string_values,
+            "",
+            id="common_types_20",
+        ),
+        pytest.param(
+            "common_types",
+            "DATE",
+            "Nullable(Date)",
+            date_values,
+            "",
+            "",
+            id="common_types_21",
+        ),
+        pytest.param(
+            "common_types",
+            "DATE",
+            "Nullable(Date32)",
+            date2Date32_values,
+            "",
+            "date2Date32",
+            id="common_types_22",
+        ),
+        pytest.param(
+            "common_types",
+            "DATE",
+            "Nullable(String)",
+            date2String_values,
+            "",
+            "date2String",
+            id="common_types_23",
+        ),
+        pytest.param(
+            "common_types",
+            "binary(1)",
+            "Nullable(FixedString(1))",
+            [1],
+            [1],
+            "",
+            id="common_types_24",
+        ),
+        pytest.param(
+            "common_types",
+            "binary(0)",
+            "Nullable(FixedString(1))",
+            ["NULL"],
+            ["\\N"],
+            "",
+            id="common_types_25",
+        ),
+        pytest.param(
+            "decimal_default",
+            "decimal NOT NULL",
+            "Decimal(10, 0)",
+            decimal_values,
+            "",
+            "decimal,datetime64",
+            id="decimal_1",
+        ),
+        pytest.param(
+            "decimal_default_nullable",
+            "decimal",
+            "Nullable(Decimal(10, 0))",
+            decimal_values,
+            "",
+            "decimal,datetime64",
+            id="decimal_2",
+        ),
+        pytest.param(
+            "decimal_18_6",
+            "decimal(18, 6) NOT NULL",
+            "Decimal(18, 6)",
+            decimal_values,
+            "",
+            "decimal,datetime64",
+            id="decimal_3",
+        ),
+        pytest.param(
+            "decimal_38_6",
+            "decimal(38, 6) NOT NULL",
+            "Decimal(38, 6)",
+            decimal_values,
+            "",
+            "decimal,datetime64",
+            id="decimal_4",
+        ),
+        # Due to python DB driver roundtrip MySQL timestamp and datetime values
+        # are printed with 6 digits after decimal point, so to simplify tests a bit,
+        # we only validate precision of 0 and 6.
+        pytest.param(
+            "timestamp_default",
+            "timestamp",
+            "DateTime",
+            timestamp_values,
+            "",
+            "decimal,datetime64",
+            id="timestamp_default",
+        ),
+        pytest.param(
+            "timestamp_6",
+            "timestamp(6)",
+            "DateTime64(6)",
+            timestamp_values,
+            "",
+            "decimal,datetime64",
+            id="timestamp_6",
+        ),
+        pytest.param(
+            "datetime_default",
+            "DATETIME NOT NULL",
+            "DateTime64(0)",
+            timestamp_values,
+            "",
+            "decimal,datetime64",
+            id="datetime_default",
+        ),
+        pytest.param(
+            "datetime_6",
+            "DATETIME(6) NOT NULL",
+            "DateTime64(6)",
+            timestamp_values,
+            "",
+            "decimal,datetime64",
+            id="datetime_6_1",
+        ),
+        pytest.param(
+            "decimal_40_6",
+            "decimal(40, 6) NOT NULL",
+            "Decimal(40, 6)",
+            decimal_values,
+            "",
+            "decimal,datetime64",
+            id="decimal_40_6",
+        ),
+        pytest.param(
+            "decimal_18_6",
+            "decimal(18, 6) NOT NULL",
+            "String",
+            decimal_values,
+            "",
+            "datetime64",
+            id="decimal_18_6_1",
+        ),
+        pytest.param(
+            "decimal_18_6",
+            "decimal(18, 6) NOT NULL",
+            "String",
+            decimal_values,
+            "",
+            "",
+            id="decimal_18_6_2",
+        ),
+        pytest.param(
+            "datetime_6",
+            "DATETIME(6) NOT NULL",
+            "DateTime",
+            timestamp_values_no_subsecond,
+            "",
+            "decimal",
+            id="datetime_6_2",
+        ),
+        pytest.param(
+            "datetime_6",
+            "DATETIME(6) NOT NULL",
+            "DateTime",
+            timestamp_values_no_subsecond,
+            "",
+            "",
+            id="datetime_6_3",
+        ),
+    ],
+)
+def test_mysql_types(
+    started_cluster,
+    case_name,
+    mysql_type,
+    expected_ch_type,
+    mysql_values,
+    expected_clickhouse_values,
+    setting_mysql_datatypes_support_level,
+):
+    """Verify that values written to MySQL can be read on ClickHouse side via DB engine MySQL,
+    or Table engine MySQL, or mysql() table function.
+    Make sure that type is converted properly and values match exactly.
+    """
+
+    substitutes = dict(
+        mysql_db="decimal_support",
+        table_name=case_name,
+        mysql_type=mysql_type,
+        mysql_values=", ".join("({})".format(x) for x in mysql_values),
+        ch_mysql_db="mysql_db",
+        ch_mysql_table="mysql_table_engine_" + case_name,
+        expected_ch_type=expected_ch_type,
+    )
+
+    clickhouse_query_settings = dict(
+        mysql_datatypes_support_level=setting_mysql_datatypes_support_level,
+        output_format_decimal_trailing_zeros=1,
+    )
+
+    def execute_query(node, query, **kwargs):
+        def do_execute(query):
+            query = Template(query).safe_substitute(substitutes)
+            res = node.query(query, **kwargs)
+            return res if isinstance(res, int) else res.rstrip("\n\r")
+
+        if isinstance(query, (str, bytes)):
+            return do_execute(query)
+        else:
+            return [do_execute(q) for q in query]
+
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            port=started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        execute_query(
+            mysql_node,
+            [
+                "DROP DATABASE IF EXISTS ${mysql_db}",
+                "CREATE DATABASE ${mysql_db}  DEFAULT CHARACTER SET 'utf8'",
+                "CREATE TABLE `${mysql_db}`.`${table_name}` (value ${mysql_type})",
+                "INSERT INTO `${mysql_db}`.`${table_name}` (value) VALUES ${mysql_values}",
+                "SELECT * FROM `${mysql_db}`.`${table_name}`",
+                "FLUSH TABLES",
+            ],
+        )
+
+        assert execute_query(
+            mysql_node, "SELECT COUNT(*) FROM ${mysql_db}.${table_name}"
+        ) == "{}".format(len(mysql_values))
+
+        # MySQL TABLE ENGINE
+        execute_query(
+            clickhouse_node,
+            [
+                "DROP TABLE IF EXISTS ${ch_mysql_table};",
+                "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+            ],
+            settings=clickhouse_query_settings,
+        )
+
+        # Validate type
+        assert (
+            execute_query(
+                clickhouse_node,
+                "SELECT toTypeName(value) FROM ${ch_mysql_table} LIMIT 1",
+                settings=clickhouse_query_settings,
+            )
+            == expected_ch_type
+        )
+
+        expected_format_clickhouse_values = arryToString(expected_clickhouse_values)
+        if expected_format_clickhouse_values == "":
+            expected_format_clickhouse_values = execute_query(
+                mysql_node, "SELECT value FROM ${mysql_db}.${table_name}"
+            )
+
+        # Validate values
+        assert expected_format_clickhouse_values == execute_query(
+            clickhouse_node,
+            "SELECT value FROM ${ch_mysql_table}",
+            settings=clickhouse_query_settings,
+        )
+
+        # MySQL DATABASE ENGINE
+        execute_query(
+            clickhouse_node,
+            [
+                "DROP DATABASE IF EXISTS ${ch_mysql_db}",
+                "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql57:3306', '${mysql_db}', 'root', 'clickhouse')",
+            ],
+            settings=clickhouse_query_settings,
+        )
+
+        # Validate type
+        assert (
+            execute_query(
+                clickhouse_node,
+                "SELECT toTypeName(value) FROM ${ch_mysql_db}.${table_name} LIMIT 1",
+                settings=clickhouse_query_settings,
+            )
+            == expected_ch_type
+        )
+
+        # Validate values
+        assert expected_format_clickhouse_values == execute_query(
+            clickhouse_node,
+            "SELECT value FROM ${ch_mysql_db}.${table_name}",
+            settings=clickhouse_query_settings,
+        )
+
+        # MySQL TABLE FUNCTION
+        # Validate type
+        assert (
+            execute_query(
+                clickhouse_node,
+                "SELECT toTypeName(value) FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1",
+                settings=clickhouse_query_settings,
+            )
+            == expected_ch_type
+        )
+
+        # Validate values
+        assert expected_format_clickhouse_values == execute_query(
+            clickhouse_node,
+            "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+            settings=clickhouse_query_settings,
+        )
+
+
+def test_predefined_connection_configuration(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL(mysql1)")
+        clickhouse_node.query(
+            "INSERT INTO `test_database`.`test_table` select number from numbers(100)"
+        )
+        assert (
+            clickhouse_node.query(
+                "SELECT count() FROM `test_database`.`test_table`"
+            ).rstrip()
+            == "100"
+        )
+
+        result = clickhouse_node.query("show create table test_database.test_table")
+        assert (
+            result.strip()
+            == "CREATE TABLE test_database.test_table\\n(\\n    `id` Int32\\n)\\nENGINE = MySQL(mysql1, table = \\'test_table\\')"
+        )
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        clickhouse_node.query_and_get_error(
+            "CREATE DATABASE test_database ENGINE = MySQL(mysql2)"
+        )
+        clickhouse_node.query_and_get_error(
+            "CREATE DATABASE test_database ENGINE = MySQL(unknown_collection)"
+        )
+        clickhouse_node.query_and_get_error(
+            "CREATE DATABASE test_database ENGINE = MySQL(mysql1, 1)"
+        )
+
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL(mysql1, port=3306)"
+        )
+        assert (
+            clickhouse_node.query(
+                "SELECT count() FROM `test_database`.`test_table`"
+            ).rstrip()
+            == "100"
+        )
+
+
+def test_restart_server(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_restart")
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_restart")
+        clickhouse_node.query_and_get_error(
+            "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')"
+        )
+        assert "test_restart" not in clickhouse_node.query("SHOW DATABASES")
+
+        mysql_node.query("CREATE DATABASE test_restart DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+        clickhouse_node.query(
+            "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')"
+        )
+
+        assert "test_restart" in clickhouse_node.query("SHOW DATABASES")
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_restart")
+
+        with PartitionManager() as pm:
+            pm.partition_instances(
+                clickhouse_node, mysql_node, action="REJECT --reject-with tcp-reset"
+            )
+            clickhouse_node.restart_clickhouse()
+            clickhouse_node.query_and_get_error("SHOW TABLES FROM test_restart")
+        assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_restart")
+
+
+def test_memory_leak(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
+        )
+        clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`")
+
+        clickhouse_node.query("DROP DATABASE test_database")
+        clickhouse_node.restart_clickhouse()
+
+
+def test_password_leak(started_cluster):
+    with contextlib.closing(
+        MySQLNodeInstance(
+            "root",
+            "clickhouse",
+            started_cluster.mysql57_ip,
+            started_cluster.mysql57_port,
+        )
+    ) as mysql_node:
+        mysql_node.query("DROP DATABASE IF EXISTS test_database")
+        mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
+        mysql_node.query(
+            "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
+        )
+
+        clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
+        clickhouse_node.query(
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
+        )
+        assert "clickhouse" not in clickhouse_node.query(
+            "SHOW CREATE test_database.test_table"
+        )
diff --git a/tests/integration/test_mysql_database_engine/configs/named_collections.xml b/tests/integration/test_mysql_database_engine/configs/named_collections.xml
index 3b65536f20f..22be308b5a8 100644
--- a/tests/integration/test_mysql_database_engine/configs/named_collections.xml
+++ b/tests/integration/test_mysql_database_engine/configs/named_collections.xml
@@ -3,7 +3,7 @@
         <mysql1>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>test_database</database>
         </mysql1>
@@ -15,7 +15,7 @@
         <mysql3>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>1111</port>
             <database>clickhouse</database>
         </mysql3>
diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py
index 00b5eb9e8aa..64a38679121 100644
--- a/tests/integration/test_mysql_database_engine/test.py
+++ b/tests/integration/test_mysql_database_engine/test.py
@@ -13,7 +13,7 @@ clickhouse_node = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"],
     user_configs=["configs/users.xml"],
-    with_mysql=True,
+    with_mysql8=True,
     stay_alive=True,
 )
 
@@ -70,7 +70,7 @@ class MySQLNodeInstance:
 def test_mysql_ddl_for_mysql_database(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -78,7 +78,7 @@ def test_mysql_ddl_for_mysql_database(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')"
         )
         assert "test_database" in clickhouse_node.query("SHOW DATABASES")
 
@@ -121,7 +121,7 @@ def test_mysql_ddl_for_mysql_database(started_cluster):
 def test_clickhouse_ddl_for_mysql_database(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -132,7 +132,7 @@ def test_clickhouse_ddl_for_mysql_database(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')"
         )
 
         assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database")
@@ -158,7 +158,7 @@ def test_clickhouse_ddl_for_mysql_database(started_cluster):
 def test_clickhouse_dml_for_mysql_database(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -169,7 +169,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', test_database, 'root', 'clickhouse')"
         )
 
         assert (
@@ -197,7 +197,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster):
 def test_clickhouse_join_for_mysql_database(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test")
@@ -218,10 +218,10 @@ def test_clickhouse_join_for_mysql_database(started_cluster):
         clickhouse_node.query("DROP TABLE IF EXISTS default.t1_remote_mysql SYNC")
         clickhouse_node.query("DROP TABLE IF EXISTS default.t2_remote_mysql SYNC")
         clickhouse_node.query(
-            "CREATE TABLE default.t1_remote_mysql AS mysql('mysql57:3306','test','t1_mysql_local','root','clickhouse')"
+            "CREATE TABLE default.t1_remote_mysql AS mysql('mysql80:3306','test','t1_mysql_local','root','clickhouse')"
         )
         clickhouse_node.query(
-            "CREATE TABLE default.t2_remote_mysql AS mysql('mysql57:3306','test','t2_mysql_local','root','clickhouse')"
+            "CREATE TABLE default.t2_remote_mysql AS mysql('mysql80:3306','test','t2_mysql_local','root','clickhouse')"
         )
         clickhouse_node.query(
             "INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')"
@@ -248,8 +248,8 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster):
         MySQLNodeInstance(
             "root",
             "clickhouse",
-            started_cluster.mysql_ip,
-            port=started_cluster.mysql_port,
+            started_cluster.mysql8_ip,
+            port=started_cluster.mysql8_port,
         )
     ) as mysql_node:
         with pytest.raises(QueryRuntimeException) as exception:
@@ -257,7 +257,7 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster):
                 "CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'"
             )
             clickhouse_node.query(
-                "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql57:3306', test_bad_arguments, root, 'clickhouse')"
+                "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql80:3306', test_bad_arguments, root, 'clickhouse')"
             )
         assert "Database engine MySQL requested literal argument." in str(
             exception.value
@@ -268,7 +268,7 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster):
 def test_column_comments_for_mysql_database_engine(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -276,7 +276,7 @@ def test_column_comments_for_mysql_database_engine(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')"
         )
         assert "test_database" in clickhouse_node.query("SHOW DATABASES")
 
@@ -304,7 +304,7 @@ def test_column_comments_for_mysql_database_engine(started_cluster):
 def test_data_types_support_level_for_mysql_database_engine(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test")
@@ -313,7 +313,7 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster):
         )
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse')",
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', test, 'root', 'clickhouse')",
             settings={"mysql_datatypes_support_level": "decimal,datetime64"},
         )
 
@@ -331,7 +331,7 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster):
         )
 
         clickhouse_node.query(
-            "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'",
+            "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql80:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'",
             settings={"mysql_datatypes_support_level": "decimal"},
         )
 
@@ -693,7 +693,7 @@ def arryToString(expected_clickhouse_values):
         pytest.param(
             "timestamp_default",
             "timestamp",
-            "DateTime",
+            "Nullable(DateTime)",
             timestamp_values,
             "",
             "decimal,datetime64",
@@ -702,7 +702,7 @@ def arryToString(expected_clickhouse_values):
         pytest.param(
             "timestamp_6",
             "timestamp(6)",
-            "DateTime64(6)",
+            "Nullable(DateTime64(6))",
             timestamp_values,
             "",
             "decimal,datetime64",
@@ -817,8 +817,8 @@ def test_mysql_types(
         MySQLNodeInstance(
             "root",
             "clickhouse",
-            started_cluster.mysql_ip,
-            port=started_cluster.mysql_port,
+            started_cluster.mysql8_ip,
+            port=started_cluster.mysql8_port,
         )
     ) as mysql_node:
         execute_query(
@@ -842,7 +842,7 @@ def test_mysql_types(
             clickhouse_node,
             [
                 "DROP TABLE IF EXISTS ${ch_mysql_table};",
-                "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+                "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
             ],
             settings=clickhouse_query_settings,
         )
@@ -875,7 +875,7 @@ def test_mysql_types(
             clickhouse_node,
             [
                 "DROP DATABASE IF EXISTS ${ch_mysql_db}",
-                "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql57:3306', '${mysql_db}', 'root', 'clickhouse')",
+                "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql80:3306', '${mysql_db}', 'root', 'clickhouse')",
             ],
             settings=clickhouse_query_settings,
         )
@@ -902,7 +902,7 @@ def test_mysql_types(
         assert (
             execute_query(
                 clickhouse_node,
-                "SELECT toTypeName(value) FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1",
+                "SELECT toTypeName(value) FROM mysql('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1",
                 settings=clickhouse_query_settings,
             )
             == expected_ch_type
@@ -911,7 +911,7 @@ def test_mysql_types(
         # Validate values
         assert expected_format_clickhouse_values == execute_query(
             clickhouse_node,
-            "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+            "SELECT value FROM mysql('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
             settings=clickhouse_query_settings,
         )
 
@@ -919,7 +919,7 @@ def test_mysql_types(
 def test_predefined_connection_configuration(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -971,13 +971,13 @@ def test_predefined_connection_configuration(started_cluster):
 def test_restart_server(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_restart")
         clickhouse_node.query("DROP DATABASE IF EXISTS test_restart")
         clickhouse_node.query_and_get_error(
-            "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')"
+            "CREATE DATABASE test_restart ENGINE = MySQL('mysql80:3306', 'test_restart', 'root', 'clickhouse')"
         )
         assert "test_restart" not in clickhouse_node.query("SHOW DATABASES")
 
@@ -986,7 +986,7 @@ def test_restart_server(started_cluster):
             "CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;"
         )
         clickhouse_node.query(
-            "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')"
+            "CREATE DATABASE test_restart ENGINE = MySQL('mysql80:3306', 'test_restart', 'root', 'clickhouse')"
         )
 
         assert "test_restart" in clickhouse_node.query("SHOW DATABASES")
@@ -1004,7 +1004,7 @@ def test_restart_server(started_cluster):
 def test_memory_leak(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -1015,7 +1015,7 @@ def test_memory_leak(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
         )
         clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`")
 
@@ -1026,7 +1026,7 @@ def test_memory_leak(started_cluster):
 def test_password_leak(started_cluster):
     with contextlib.closing(
         MySQLNodeInstance(
-            "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port
+            "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port
         )
     ) as mysql_node:
         mysql_node.query("DROP DATABASE IF EXISTS test_database")
@@ -1037,7 +1037,7 @@ def test_password_leak(started_cluster):
 
         clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
         clickhouse_node.query(
-            "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
+            "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1"
         )
         assert "clickhouse" not in clickhouse_node.query(
             "SHOW CREATE test_database.test_table"
diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index e8b3ba3fcf3..06cbe70f7c6 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -14,7 +14,7 @@ cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
     "node1",
     with_odbc_drivers=True,
-    with_mysql=True,
+    with_mysql8=True,
     with_postgres=True,
     main_configs=["configs/openssl.xml", "configs/odbc_logging.xml"],
     dictionaries=[
@@ -55,13 +55,13 @@ def get_mysql_conn():
                 conn = pymysql.connect(
                     user="root",
                     password="clickhouse",
-                    host=cluster.mysql_ip,
-                    port=cluster.mysql_port,
+                    host=cluster.mysql8_ip,
+                    port=cluster.mysql8_port,
                 )
             else:
                 conn.ping(reconnect=True)
             logging.debug(
-                f"MySQL Connection establised: {cluster.mysql_ip}:{cluster.mysql_port}"
+                f"MySQL Connection establised: {cluster.mysql8_ip}:{cluster.mysql8_port}"
             )
             return conn
         except Exception as e:
@@ -230,7 +230,7 @@ def test_mysql_simple_select_works(started_cluster):
 
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nullable(UInt32)) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nullable(UInt32)) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse');
 """.format(
             table_name, table_name
         )
diff --git a/tests/integration/test_storage_mysql/configs/named_collections.xml b/tests/integration/test_storage_mysql/configs/named_collections.xml
index 04117f32d4b..d20630eaeb3 100644
--- a/tests/integration/test_storage_mysql/configs/named_collections.xml
+++ b/tests/integration/test_storage_mysql/configs/named_collections.xml
@@ -3,7 +3,7 @@
         <mysql1>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>clickhouse</database>
             <table>test_table</table>
@@ -16,7 +16,7 @@
         <mysql3>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>1111</port>
             <database>clickhouse</database>
             <table>test_table</table>
@@ -24,7 +24,7 @@
         <mysql4>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>clickhouse</database>
             <table>test_table</table>
@@ -33,7 +33,7 @@
         <mysql_with_settings>
             <user>root</user>
             <password>clickhouse</password>
-            <host>mysql57</host>
+            <host>mysql80</host>
             <port>3306</port>
             <database>clickhouse</database>
             <connection_pool_size>1</connection_pool_size>
diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py
index e2257026dc7..9818a8183d7 100644
--- a/tests/integration/test_storage_mysql/test.py
+++ b/tests/integration/test_storage_mysql/test.py
@@ -14,7 +14,7 @@ node1 = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"],
     user_configs=["configs/users.xml"],
-    with_mysql=True,
+    with_mysql8=True,
 )
 node2 = cluster.add_instance(
     "node2", main_configs=["configs/remote_servers.xml"], with_mysql_cluster=True
@@ -23,7 +23,7 @@ node3 = cluster.add_instance(
     "node3",
     main_configs=["configs/remote_servers.xml"],
     user_configs=["configs/users.xml"],
-    with_mysql=True,
+    with_mysql8=True,
 )
 
 create_table_sql_template = """
@@ -43,7 +43,7 @@ drop_table_sql_template = """
 
 def get_mysql_conn(started_cluster, host):
     conn = pymysql.connect(
-        user="root", password="clickhouse", host=host, port=started_cluster.mysql_port
+        user="root", password="clickhouse", host=host, port=started_cluster.mysql8_port
     )
     return conn
 
@@ -69,7 +69,7 @@ def started_cluster():
     try:
         cluster.start()
 
-        conn = get_mysql_conn(cluster, cluster.mysql_ip)
+        conn = get_mysql_conn(cluster, cluster.mysql8_ip)
         create_mysql_db(conn, "clickhouse")
 
         ## create mysql db and table
@@ -85,13 +85,13 @@ def test_many_connections(started_cluster):
     table_name = "test_many_connections"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse');
 """.format(
             table_name, table_name
         )
@@ -116,13 +116,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL
 def test_insert_select(started_cluster):
     table_name = "test_insert_select"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse');
 """.format(
             table_name, table_name
         )
@@ -142,13 +142,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL
 def test_replace_select(started_cluster):
     table_name = "test_replace_select"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1);
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1);
 """.format(
             table_name, table_name
         )
@@ -173,13 +173,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL
 def test_insert_on_duplicate_select(started_cluster):
     table_name = "test_insert_on_duplicate_select"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)');
 """.format(
             table_name, table_name
         )
@@ -205,12 +205,12 @@ def test_where(started_cluster):
     table_name = "test_where"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse');
 """.format(
             table_name, table_name
         )
@@ -264,11 +264,11 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL
 
 
 def test_table_function(started_cluster):
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, "table_function")
     create_mysql_table(conn, "table_function")
     table_function = (
-        "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format(
+        "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format(
             "table_function"
         )
     )
@@ -309,7 +309,7 @@ def test_table_function(started_cluster):
 
 
 def test_schema_inference(started_cluster):
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, "inference_table")
 
     with conn.cursor() as cursor:
@@ -317,7 +317,7 @@ def test_schema_inference(started_cluster):
             "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)"
         )
 
-    parameters = "'mysql57:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'"
+    parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'"
 
     node1.query(
         f"CREATE TABLE mysql_schema_inference_engine ENGINE=MySQL({parameters})"
@@ -335,7 +335,7 @@ def test_schema_inference(started_cluster):
 
 
 def test_binary_type(started_cluster):
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, "binary_type")
 
     with conn.cursor() as cursor:
@@ -343,7 +343,7 @@ def test_binary_type(started_cluster):
             "CREATE TABLE clickhouse.binary_type (id INT PRIMARY KEY, data BINARY(16) NOT NULL)"
         )
     table_function = (
-        "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format(
+        "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format(
             "binary_type"
         )
     )
@@ -363,12 +363,12 @@ def test_enum_type(started_cluster):
     table_name = "test_enum_type"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
     node1.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1);
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1);
 """.format(
             table_name, table_name
         )
@@ -388,7 +388,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('
 def test_mysql_distributed(started_cluster):
     table_name = "test_replicas"
 
-    conn1 = get_mysql_conn(started_cluster, started_cluster.mysql_ip)
+    conn1 = get_mysql_conn(started_cluster, started_cluster.mysql8_ip)
     conn2 = get_mysql_conn(started_cluster, started_cluster.mysql2_ip)
     conn3 = get_mysql_conn(started_cluster, started_cluster.mysql3_ip)
     conn4 = get_mysql_conn(started_cluster, started_cluster.mysql4_ip)
@@ -422,7 +422,7 @@ def test_mysql_distributed(started_cluster):
             CREATE TABLE test_replica{}
             (id UInt32, name String, age UInt32, money UInt32)
             ENGINE = MySQL('mysql{}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse');""".format(
-                i, 57 if i == 1 else i
+                i, 80 if i == 1 else i
             )
         )
         nodes[i - 1].query(
@@ -433,11 +433,11 @@ def test_mysql_distributed(started_cluster):
 
     # test multiple ports parsing
     result = node2.query(
-        """SELECT DISTINCT(name) FROM mysql('mysql{57|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
+        """SELECT DISTINCT(name) FROM mysql('mysql{80|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
     )
     assert result == "host1\n" or result == "host2\n" or result == "host3\n"
     result = node2.query(
-        """SELECT DISTINCT(name) FROM mysql('mysql57:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
+        """SELECT DISTINCT(name) FROM mysql('mysql80:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
     )
     assert result == "host1\n" or result == "host2\n" or result == "host3\n"
 
@@ -457,7 +457,7 @@ def test_mysql_distributed(started_cluster):
         """
         CREATE TABLE test_shards
         (id UInt32, name String, age UInt32, money UInt32)
-        ENGINE = ExternalDistributed('MySQL', 'mysql{57|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
+        ENGINE = ExternalDistributed('MySQL', 'mysql{80|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """
     )
 
     # Check only one replica in each shard is used
@@ -472,24 +472,24 @@ def test_mysql_distributed(started_cluster):
     result = node2.query(query)
     assert result == "host1\nhost2\nhost3\nhost4\n"
 
-    # disconnect mysql57
-    started_cluster.pause_container("mysql57")
+    # disconnect mysql
+    started_cluster.pause_container("mysql80")
     result = node2.query("SELECT DISTINCT(name) FROM test_shards ORDER BY name")
-    started_cluster.unpause_container("mysql57")
+    started_cluster.unpause_container("mysql80")
     assert result == "host2\nhost4\n" or result == "host3\nhost4\n"
 
 
 def test_external_settings(started_cluster):
     table_name = "test_external_settings"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
-    conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
     node3.query(f"DROP TABLE IF EXISTS {table_name}")
     node3.query(
         """
-CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse');
+CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse');
 """.format(
             table_name, table_name
         )
@@ -521,7 +521,7 @@ def test_settings_connection_wait_timeout(started_cluster):
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
     wait_timeout = 2
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
@@ -534,7 +534,7 @@ def test_settings_connection_wait_timeout(started_cluster):
             age UInt32,
             money UInt32
         )
-        ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')
+        ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')
         SETTINGS connection_wait_timeout={}, connection_pool_size=1
         """.format(
             table_name, table_name, wait_timeout
@@ -584,7 +584,7 @@ def test_settings_connection_wait_timeout(started_cluster):
 
 
 def test_predefined_connection_configuration(started_cluster):
-    conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip)
     table_name = "test_table"
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
@@ -671,7 +671,7 @@ def test_mysql_in(started_cluster):
     table_name = "test_mysql_in"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
@@ -684,7 +684,7 @@ def test_mysql_in(started_cluster):
             age UInt32,
             money UInt32
         )
-        ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')
+        ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')
         """.format(
             table_name, table_name
         )
@@ -714,7 +714,7 @@ def test_mysql_null(started_cluster):
     table_name = "test_mysql_in"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     with conn.cursor() as cursor:
         cursor.execute(
@@ -735,7 +735,7 @@ def test_mysql_null(started_cluster):
             id UInt32,
             money Nullable(UInt32)
         )
-        ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')
+        ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')
         """.format(
             table_name, table_name
         )
@@ -780,7 +780,7 @@ def test_settings(started_cluster):
     connect_timeout = 10123002
     connection_pool_size = 1
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     create_mysql_table(conn, table_name)
 
@@ -793,7 +793,7 @@ def test_settings(started_cluster):
             age UInt32,
             money UInt32
         )
-        ENGINE = MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')
+        ENGINE = MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')
         SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, read_write_timeout={rw_timeout}, connection_pool_size={connection_pool_size}
         """
     )
@@ -815,7 +815,7 @@ def test_settings(started_cluster):
     node1.query(
         f"""
         SELECT *
-            FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse',
+            FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse',
                        SETTINGS
                            connection_wait_timeout={wait_timeout},
                            connect_timeout={connect_timeout},
@@ -843,7 +843,7 @@ def test_settings(started_cluster):
     connect_timeout = 50123002
     node1.query(
         f"""
-        CREATE DATABASE mm ENGINE = MySQL('mysql57:3306', 'clickhouse', 'root', 'clickhouse')
+        CREATE DATABASE mm ENGINE = MySQL('mysql80:3306', 'clickhouse', 'root', 'clickhouse')
             SETTINGS
                 connection_wait_timeout={wait_timeout},
                 connect_timeout={connect_timeout},
@@ -863,7 +863,7 @@ def test_mysql_point(started_cluster):
     table_name = "test_mysql_point"
     node1.query(f"DROP TABLE IF EXISTS {table_name}")
 
-    conn = get_mysql_conn(started_cluster, cluster.mysql_ip)
+    conn = get_mysql_conn(started_cluster, cluster.mysql8_ip)
     drop_mysql_table(conn, table_name)
     with conn.cursor() as cursor:
         cursor.execute(
@@ -882,25 +882,25 @@ def test_mysql_point(started_cluster):
     conn.commit()
 
     result = node1.query(
-        f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
+        f"DESCRIBE mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
     )
     assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint"
 
     assert 1 == int(
         node1.query(
-            f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
+            f"SELECT count() FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
         )
     )
     assert (
         "(15,20)"
         == node1.query(
-            f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
+            f"SELECT point FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
         ).strip()
     )
 
     node1.query("DROP TABLE IF EXISTS test")
     node1.query(
-        f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
+        f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')"
     )
     assert "(15,20)" == node1.query(f"SELECT point FROM test").strip()
 

From ae91c655a003ee73ef6e983c4d28d492f4a8ecbc Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 21 Feb 2024 14:36:58 +0300
Subject: [PATCH 870/884] Tables system.backups and system.backup_log add
 query_id and error stacktrace

---
 src/Backups/BackupOperationInfo.h            |  3 +++
 src/Backups/BackupsWorker.cpp                | 22 ++++++++++++++++----
 src/Backups/BackupsWorker.h                  |  3 ++-
 src/Interpreters/BackupLog.cpp               |  2 ++
 src/Storages/System/StorageSystemBackups.cpp |  3 +++
 5 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h
index e57b57d75f1..21b5284458c 100644
--- a/src/Backups/BackupOperationInfo.h
+++ b/src/Backups/BackupOperationInfo.h
@@ -20,6 +20,9 @@ struct BackupOperationInfo
     /// Base Backup Operation name, a string like "Disk('backups', 'my_base_backup')"
     String base_backup_name;
 
+    /// Query ID of a query that started backup
+    String query_id;
+
     /// This operation is internal and should not be shown in system.backups
     bool internal = false;
 
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index c19be22c749..5905d723800 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -440,7 +440,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
 
     try
     {
-        addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, context->getProcessListElement(), BackupStatus::CREATING_BACKUP);
+        addInfo(backup_id,
+            backup_name_for_logging,
+            base_backup_name,
+            context->getCurrentQueryId(),
+            backup_settings.internal,
+            context->getProcessListElement(),
+            BackupStatus::CREATING_BACKUP);
 
         /// Prepare context to use.
         ContextPtr context_in_use = context;
@@ -823,7 +829,13 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
         if (restore_settings.base_backup_info)
             base_backup_name = restore_settings.base_backup_info->toStringForLogging();
 
-        addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, context->getProcessListElement(), BackupStatus::RESTORING);
+        addInfo(restore_id,
+            backup_name_for_logging,
+            base_backup_name,
+            context->getCurrentQueryId(),
+            restore_settings.internal,
+            context->getProcessListElement(),
+            BackupStatus::RESTORING);
 
         /// Prepare context to use.
         ContextMutablePtr context_in_use = context;
@@ -1108,13 +1120,15 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr
 }
 
 
-void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status)
+void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, const String & query_id,
+                            bool internal, QueryStatusPtr process_list_element, BackupStatus status)
 {
     ExtendedOperationInfo extended_info;
     auto & info = extended_info.info;
     info.id = id;
     info.name = name;
     info.base_backup_name = base_backup_name;
+    info.query_id = query_id;
     info.internal = internal;
     info.status = status;
     info.start_time = std::chrono::system_clock::now();
@@ -1183,7 +1197,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw
 
     if (isFailedOrCancelled(status))
     {
-        info.error_message = getCurrentExceptionMessage(false);
+        info.error_message = getCurrentExceptionMessage(true /*with_stacktrace*/);
         info.exception = std::current_exception();
     }
 
diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h
index 73c8bf19473..ad187552c31 100644
--- a/src/Backups/BackupsWorker.h
+++ b/src/Backups/BackupsWorker.h
@@ -108,7 +108,8 @@ private:
     /// Run data restoring tasks which insert data to tables.
     void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool, QueryStatusPtr process_list_element);
 
-    void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status);
+    void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, const String & query_id,
+                bool internal, QueryStatusPtr process_list_element, BackupStatus status);
     void setStatus(const BackupOperationID & id, BackupStatus status, bool throw_if_error = true);
     void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
     void setNumFilesAndSize(const BackupOperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp
index d34e982ffc3..d5b69bc0728 100644
--- a/src/Interpreters/BackupLog.cpp
+++ b/src/Interpreters/BackupLog.cpp
@@ -28,6 +28,7 @@ ColumnsDescription BackupLogElement::getColumnsDescription()
         {"id", std::make_shared<DataTypeString>()},
         {"name", std::make_shared<DataTypeString>()},
         {"base_backup_name", std::make_shared<DataTypeString>()},
+        {"query_id", std::make_shared<DataTypeString>()},
         {"status", std::make_shared<DataTypeEnum8>(getBackupStatusEnumValues())},
         {"error", std::make_shared<DataTypeString>()},
         {"start_time", std::make_shared<DataTypeDateTime>()},
@@ -51,6 +52,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(info.id);
     columns[i++]->insert(info.name);
     columns[i++]->insert(info.base_backup_name);
+    columns[i++]->insert(info.query_id);
     columns[i++]->insert(static_cast<Int8>(info.status));
     columns[i++]->insert(info.error_message);
     columns[i++]->insert(static_cast<UInt32>(std::chrono::system_clock::to_time_t(info.start_time)));
diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp
index 17fb56e0a92..0063d9e308f 100644
--- a/src/Storages/System/StorageSystemBackups.cpp
+++ b/src/Storages/System/StorageSystemBackups.cpp
@@ -22,6 +22,7 @@ ColumnsDescription StorageSystemBackups::getColumnsDescription()
         {"id", std::make_shared<DataTypeString>(), "Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID."},
         {"name", std::make_shared<DataTypeString>(), "Operation name, a string like `Disk('backups', 'my_backup')`"},
         {"base_backup_name", std::make_shared<DataTypeString>(), "Base Backup Operation name, a string like `Disk('backups', 'my_base_backup')`"},
+        {"query_id", std::make_shared<DataTypeString>(), "Query ID of a query that started backup."},
         {"status", std::make_shared<DataTypeEnum8>(getBackupStatusEnumValues()), "Status of backup or restore operation."},
         {"error", std::make_shared<DataTypeString>(), "The error message if any."},
         {"start_time", std::make_shared<DataTypeDateTime>(), "The time when operation started."},
@@ -44,6 +45,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con
     auto & column_id = assert_cast<ColumnString &>(*res_columns[column_index++]);
     auto & column_name = assert_cast<ColumnString &>(*res_columns[column_index++]);
     auto & column_base_backup_name = assert_cast<ColumnString &>(*res_columns[column_index++]);
+    auto & column_query_id = assert_cast<ColumnString &>(*res_columns[column_index++]);
     auto & column_status = assert_cast<ColumnInt8 &>(*res_columns[column_index++]);
     auto & column_error = assert_cast<ColumnString &>(*res_columns[column_index++]);
     auto & column_start_time = assert_cast<ColumnUInt32 &>(*res_columns[column_index++]);
@@ -62,6 +64,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con
         column_id.insertData(info.id.data(), info.id.size());
         column_name.insertData(info.name.data(), info.name.size());
         column_base_backup_name.insertData(info.base_backup_name.data(), info.base_backup_name.size());
+        column_query_id.insertData(info.query_id.data(), info.query_id.size());
         column_status.insertValue(static_cast<Int8>(info.status));
         column_error.insertData(info.error_message.data(), info.error_message.size());
         column_start_time.insertValue(static_cast<UInt32>(std::chrono::system_clock::to_time_t(info.start_time)));

From 8f0f8bf29433dd27d352fa6c15defdcdcf8dc4f3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Feb 2024 11:54:20 +0000
Subject: [PATCH 871/884] Fix typo

---
 src/Functions/array/arrayDotProduct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp
index 12b2ce428ee..6c615a058c3 100644
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@@ -130,7 +130,7 @@ struct DotProduct
 
 
 /// The implementation is modeled after the implementation of distance functions arrayL1Distance, arrayL2Distance, etc.
-/// The main difference is that arrayDotProduct() interfers the result type differently.
+/// The main difference is that arrayDotProduct() interferes the result type differently.
 template <typename Kernel>
 class FunctionArrayScalarProduct : public IFunction
 {

From 1ac94813ed49d08ba1c007a7e182b618583d6a59 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 21 Feb 2024 14:14:45 +0000
Subject: [PATCH 872/884] review fixes

---
 src/Common/Scheduler/Nodes/FairPolicy.h     |  1 +
 src/Common/Scheduler/Nodes/FifoQueue.h      | 10 +++++-----
 src/Common/Scheduler/Nodes/PriorityPolicy.h |  1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h
index 53740e7a543..ce2bf729a04 100644
--- a/src/Common/Scheduler/Nodes/FairPolicy.h
+++ b/src/Common/Scheduler/Nodes/FairPolicy.h
@@ -134,6 +134,7 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
+        // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
         while (true)
         {
             if (heap_size == 0)
diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h
index 2adb7241314..45ed32343ff 100644
--- a/src/Common/Scheduler/Nodes/FifoQueue.h
+++ b/src/Common/Scheduler/Nodes/FifoQueue.h
@@ -39,7 +39,7 @@ public:
 
     void enqueueRequest(ResourceRequest * request) override
     {
-        std::unique_lock lock(mutex);
+        std::lock_guard lock(mutex);
         queue_cost += request->cost;
         bool was_empty = requests.empty();
         requests.push_back(request);
@@ -49,7 +49,7 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        std::unique_lock lock(mutex);
+        std::lock_guard lock(mutex);
         if (requests.empty())
             return {nullptr, false};
         ResourceRequest * result = requests.front();
@@ -64,7 +64,7 @@ public:
 
     bool cancelRequest(ResourceRequest * request) override
     {
-        std::unique_lock lock(mutex);
+        std::lock_guard lock(mutex);
         // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
         for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
         {
@@ -84,7 +84,7 @@ public:
 
     bool isActive() override
     {
-        std::unique_lock lock(mutex);
+        std::lock_guard lock(mutex);
         return !requests.empty();
     }
 
@@ -117,7 +117,7 @@ public:
 
     std::pair<UInt64, Int64> getQueueLengthAndCost()
     {
-        std::unique_lock lock(mutex);
+        std::lock_guard lock(mutex);
         return {requests.size(), queue_cost};
     }
 
diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h
index fd02ea3df62..9b4cfc37f8c 100644
--- a/src/Common/Scheduler/Nodes/PriorityPolicy.h
+++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h
@@ -102,6 +102,7 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
+        // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
         while (true)
         {
             if (items.empty())

From 5ba371662f0cca9e87eb0eab82fd9a4d916494a8 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 21 Feb 2024 18:10:27 +0300
Subject: [PATCH 873/884] Backups delete suspicious file

---
 src/Backups/.BackupCoordinationLocal.cpp.pHKoqj | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/Backups/.BackupCoordinationLocal.cpp.pHKoqj

diff --git a/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj b/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj
deleted file mode 100644
index e69de29bb2d..00000000000

From 47ab81bbabc7a5c9773ba4f54e698e5a6e200bfd Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Wed, 21 Feb 2024 14:55:48 +0000
Subject: [PATCH 874/884] CI: support for random job pick

 #no_merge_commit
 #no_ci_cache
---
 tests/ci/ci.py        | 37 ++++++++++++++++++++++++++++++++-----
 tests/ci/ci_config.py | 22 ++++++++++++----------
 2 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 4d2b124a32c..320a0ef42d5 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -6,12 +6,13 @@ from enum import Enum
 import json
 import logging
 import os
+import random
 import re
 import subprocess
 import sys
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Union
 
 import docker_images_helper
 import upload_result_helper
@@ -1107,6 +1108,7 @@ def _configure_jobs(
         ci_cache.print_status()
 
     jobs_to_wait: Dict[str, Dict[str, Any]] = {}
+    randomization_buckets = {}  # type: Dict[str, Set[str]]
 
     for job in digests:
         digest = digests[job]
@@ -1115,11 +1117,18 @@ def _configure_jobs(
         batches_to_do: List[int] = []
         add_to_skip = False
 
+        if job_config.pr_only and pr_info.is_release_branch():
+            continue
+        if job_config.release_only and not pr_info.is_release_branch():
+            continue
+
+        # fill job randomization buckets (for jobs with configured @random_bucket property))
+        if job_config.random_bucket:
+            if not job_config.random_bucket in randomization_buckets:
+                randomization_buckets[job_config.random_bucket] = set()
+            randomization_buckets[job_config.random_bucket].add(job)
+
         for batch in range(num_batches):  # type: ignore
-            if job_config.pr_only and pr_info.is_release_branch():
-                continue
-            if job_config.release_only and not pr_info.is_release_branch():
-                continue
             if job_config.run_by_label:
                 # this job controlled by label, add to todo if its label is set in pr
                 if job_config.run_by_label in pr_info.labels:
@@ -1167,6 +1176,24 @@ def _configure_jobs(
             "num_batches": num_batches,
         }
 
+    if not pr_info.is_release_branch():
+        # randomization bucket filtering (pick one random job from each bucket, for jobs with configured random_bucket property)
+        for _, jobs in randomization_buckets.items():
+            jobs_to_remove_randomization = set()
+            bucket_ = list(jobs)
+            random.shuffle(bucket_)
+            while len(bucket_) > 1:
+                random_job = bucket_.pop()
+                if random_job in jobs_to_do:
+                    jobs_to_remove_randomization.add(random_job)
+            if jobs_to_remove_randomization:
+                print(
+                    f"Following jobs will be removed due to randomization bucket: [{jobs_to_remove_randomization}]"
+                )
+                jobs_to_do = [
+                    job for job in jobs_to_do if job not in jobs_to_remove_randomization
+                ]
+
     ## c. check CI controlling labels and commit messages
     if pr_info.labels:
         jobs_requested_by_label = []  # type: List[str]
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 3ebcbb7ed59..ccae8dd1383 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -198,6 +198,8 @@ class JobConfig:
     pr_only: bool = False
     # job is for release/master branches only
     release_only: bool = False
+    # to randomly pick and run one job among jobs in the same @random_bucket. Applied in PR branches only.
+    random_bucket: str = ""
 
 
 @dataclass
@@ -993,29 +995,29 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_TSAN,
             job_config=JobConfig(num_batches=5, **statless_test_common_params),  # type: ignore
         ),
-        JobNames.STRESS_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
+        JobNames.STRESS_TEST_DEBUG: TestConfig(
+            Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_TSAN: TestConfig(
             Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
         ),
+        JobNames.STRESS_TEST_ASAN: TestConfig(
+            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
+        ),
         JobNames.STRESS_TEST_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
-        ),
-        JobNames.STRESS_TEST_DEBUG: TestConfig(
-            Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore

From c05c3944d11071201b216271679c0accf7db64aa Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 21 Feb 2024 16:49:53 +0100
Subject: [PATCH 875/884] Hide sensitive info for s3queue

---
 src/Parsers/ASTFunction.cpp                        | 2 +-
 tests/integration/test_mask_sensitive_info/test.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index ae9b8ddbe85..ba4c7db96e6 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -380,7 +380,7 @@ namespace
                 findMySQLFunctionSecretArguments();
             }
             else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
-                     (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg"))
+                     (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
             {
                 /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
                 findS3TableEngineSecretArguments();
diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py
index ec34c181371..d57960c629a 100644
--- a/tests/integration/test_mask_sensitive_info/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@@ -195,6 +195,10 @@ def test_create_table():
             f"DeltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
             "DNS_ERROR",
         ),
+        f"S3Queue('http://minio1:9001/root/data/', 'CSV')",
+        f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip')",
+        f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV')",
+        f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip')",
     ]
 
     def make_test_case(i):
@@ -254,6 +258,10 @@ def test_create_table():
             "CREATE TABLE table14 (x int) ENGINE = S3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV', 'gzip')",
             "CREATE TABLE table15 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')",
             "CREATE TABLE table16 (`x` int) ENGINE = DeltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
+            "CREATE TABLE table17 (x int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'CSV')",
+            "CREATE TABLE table18 (x int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip')",
+            "CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV')",
+            "CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip')",
         ],
         must_not_contain=[password],
     )

From f8274692073b90d9cf14659f3f6300e5083adba7 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Wed, 21 Feb 2024 16:08:03 +0000
Subject: [PATCH 876/884] CI: random sanitizer for parallel repl in PR wf

---
 tests/ci/ci_config.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index ccae8dd1383..d78005bcc19 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -934,16 +934,16 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         # End stateful tests for parallel replicas
         JobNames.STATELESS_TEST_ASAN: TestConfig(

From 5ea3afb06ee38a6bebbeb25ffe3281741ad67fe2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Feb 2024 16:29:47 +0000
Subject: [PATCH 877/884] Docs: Correct variable names for
 simpleLinearRegression

Fixes: #59729
---
 .../aggregate-functions/reference/simplelinearregression.md   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md
index bcff05ada47..ea3dbff8691 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md
@@ -13,8 +13,8 @@ simpleLinearRegression(x, y)
 
 Parameters:
 
-- `x` — Column with dependent variable values.
-- `y` — Column with explanatory variable values.
+- `x` — Column with explanatory variable values.
+- `y` — Column with dependent variable values.
 
 Returned values:
 

From da50758eb60d353e4055bbd8a3378b9dd63b1fd0 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Wed, 21 Feb 2024 16:59:41 +0000
Subject: [PATCH 878/884] CI: use aarch style checker for CI config job 
 #do_not_test

---
 .github/workflows/backport_branches.yml | 2 +-
 .github/workflows/master.yml            | 2 +-
 .github/workflows/nightly.yml           | 2 +-
 .github/workflows/pull_request.yml      | 2 +-
 .github/workflows/release_branches.yml  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 6b05f1fe9f4..51670087ffe 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -11,7 +11,7 @@ on: # yamllint disable-line rule:truthy
       - 'backport/**'
 jobs:
   RunConfig:
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 24daca44da6..7cb5455ed73 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -11,7 +11,7 @@ on: # yamllint disable-line rule:truthy
       - 'master'
 jobs:
   RunConfig:
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 770e1ec3789..93ac2be19b4 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -14,7 +14,7 @@ jobs:
     # The task for having a preserved ENV and event.json for later investigation
     uses: ./.github/workflows/debug.yml
   RunConfig:
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index c9cf5ab90dd..1afcdab938b 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -18,7 +18,7 @@ on:  # yamllint disable-line rule:truthy
 ##########################################################################################
 jobs:
   RunConfig:
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index c076c2209ec..57e90d79ebd 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -14,7 +14,7 @@ on: # yamllint disable-line rule:truthy
 
 jobs:
   RunConfig:
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:

From 07c9deed41a835e37a0a3098141576a83fe935a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 21 Feb 2024 18:40:00 +0100
Subject: [PATCH 879/884] Make cloud sync required

---
 tests/ci/ci_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index d78005bcc19..0418e71aaf5 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -1139,6 +1139,7 @@ CI_CONFIG.validate()
 # checks required by Mergeable Check
 REQUIRED_CHECKS = [
     "PR Check",
+    "A Sync",  # Cloud sync
     JobNames.BUILD_CHECK,
     JobNames.BUILD_CHECK_SPECIAL,
     JobNames.DOCS_CHECK,

From 5fc28c536cefac8aa0e58f3c2f5b78f2650a2d8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 21 Feb 2024 19:18:47 +0100
Subject: [PATCH 880/884] Generate a conflict

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 433195af9c3..dcfe145b04c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -640,7 +640,7 @@ class IColumn;
     M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
     M(Bool, describe_include_virtual_columns, false, "If true, virtual columns of table will be included into result of DESCRIBE query", 0) \
     M(Bool, describe_compact_output, false, "If true, include only column names and types into result of DESCRIBE query", 0) \
-    M(Bool, apply_mutations_on_fly, false, "Only available in ClickHouse Cloud", 0) \
+    M(Bool, apply_mutations_on_fly, false, "Generate a conflict", 0) \
     M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \

From 453d4d30cf3b788be2bd7023cfcc6c91455f3409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 21 Feb 2024 20:19:00 +0100
Subject: [PATCH 881/884] Revert "Support resource request canceling"

---
 docs/en/operations/system-tables/scheduler.md |   4 -
 src/Common/Scheduler/ISchedulerNode.h         |   2 -
 src/Common/Scheduler/ISchedulerQueue.h        |   6 -
 src/Common/Scheduler/Nodes/FairPolicy.h       | 105 ++++++++----------
 src/Common/Scheduler/Nodes/FifoQueue.h        |  31 +-----
 src/Common/Scheduler/Nodes/PriorityPolicy.h   |  38 +++----
 .../tests/gtest_dynamic_resource_manager.cpp  |   1 +
 .../Nodes/tests/gtest_resource_scheduler.cpp  |  63 -----------
 src/Common/Scheduler/ResourceGuard.h          |   9 +-
 src/Common/Scheduler/ResourceRequest.cpp      |  13 ---
 src/Common/Scheduler/ResourceRequest.h        |  30 +++--
 src/Common/Scheduler/SchedulerRoot.h          |  32 +++---
 .../System/StorageSystemScheduler.cpp         |   4 -
 13 files changed, 111 insertions(+), 227 deletions(-)
 delete mode 100644 src/Common/Scheduler/ResourceRequest.cpp

diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md
index c4de7f76fdc..953db4c28f2 100644
--- a/docs/en/operations/system-tables/scheduler.md
+++ b/docs/en/operations/system-tables/scheduler.md
@@ -26,9 +26,7 @@ priority:          0
 is_active:         0
 active_children:   0
 dequeued_requests: 67
-canceled_requests: 0
 dequeued_cost:     4692272
-canceled_cost:     0
 busy_periods:      63
 vruntime:          938454.1999999989
 system_vruntime:   ᴺᵁᴸᴸ
@@ -56,9 +54,7 @@ Columns:
 - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
 - `active_children` (`UInt64`) - The number of children in active state.
 - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
-- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
 - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
-- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
 - `busy_periods` (`UInt64`) - The total number of deactivations of this node.
 - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
 - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h
index 20c1f4332da..804026d7bf4 100644
--- a/src/Common/Scheduler/ISchedulerNode.h
+++ b/src/Common/Scheduler/ISchedulerNode.h
@@ -387,9 +387,7 @@ public:
 
     /// Introspection
     std::atomic<UInt64> dequeued_requests{0};
-    std::atomic<UInt64> canceled_requests{0};
     std::atomic<ResourceCost> dequeued_cost{0};
-    std::atomic<ResourceCost> canceled_cost{0};
     std::atomic<UInt64> busy_periods{0};
 };
 
diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h
index 532f4bf6c63..cbe63bd304a 100644
--- a/src/Common/Scheduler/ISchedulerQueue.h
+++ b/src/Common/Scheduler/ISchedulerQueue.h
@@ -50,12 +50,6 @@ public:
     /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
     virtual void enqueueRequest(ResourceRequest * request) = 0;
 
-    /// Cancel previously enqueued request.
-    /// Returns `false` and does nothing given unknown or already executed request.
-    /// Returns `true` if requests has been found and canceled.
-    /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
-    virtual bool cancelRequest(ResourceRequest * request) = 0;
-
     /// For introspection
     ResourceCost getBudget() const
     {
diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h
index ce2bf729a04..c0e187e6fa9 100644
--- a/src/Common/Scheduler/Nodes/FairPolicy.h
+++ b/src/Common/Scheduler/Nodes/FairPolicy.h
@@ -134,65 +134,56 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
-        while (true)
+        if (heap_size == 0)
+            return {nullptr, false};
+
+        // Recursively pull request from child
+        auto [request, child_active] = items.front().child->dequeueRequest();
+        assert(request != nullptr);
+        std::pop_heap(items.begin(), items.begin() + heap_size);
+        Item & current = items[heap_size - 1];
+
+        // SFQ fairness invariant: system vruntime equals last served request start-time
+        assert(current.vruntime >= system_vruntime);
+        system_vruntime = current.vruntime;
+
+        // By definition vruntime is amount of consumed resource (cost) divided by weight
+        current.vruntime += double(request->cost) / current.child->info.weight;
+        max_vruntime = std::max(max_vruntime, current.vruntime);
+
+        if (child_active) // Put active child back in heap after vruntime update
         {
-            if (heap_size == 0)
-                return {nullptr, false};
-
-            // Recursively pull request from child
-            auto [request, child_active] = items.front().child->dequeueRequest();
-            std::pop_heap(items.begin(), items.begin() + heap_size);
-            Item & current = items[heap_size - 1];
-
-            if (request)
-            {
-                // SFQ fairness invariant: system vruntime equals last served request start-time
-                assert(current.vruntime >= system_vruntime);
-                system_vruntime = current.vruntime;
-
-                // By definition vruntime is amount of consumed resource (cost) divided by weight
-                current.vruntime += double(request->cost) / current.child->info.weight;
-                max_vruntime = std::max(max_vruntime, current.vruntime);
-            }
-
-            if (child_active) // Put active child back in heap after vruntime update
-            {
-                std::push_heap(items.begin(), items.begin() + heap_size);
-            }
-            else // Deactivate child if it is empty, but remember it's vruntime for latter activations
-            {
-                heap_size--;
-
-                // Store index of this inactive child in `parent.idx`
-                // This enables O(1) search of inactive children instead of O(n)
-                current.child->info.parent.idx = heap_size;
-            }
-
-            // Reset any difference between children on busy period end
-            if (heap_size == 0)
-            {
-                // Reset vtime to zero to avoid floating-point error accumulation,
-                // but do not reset too often, because it's O(N)
-                UInt64 ns = clock_gettime_ns();
-                if (last_reset_ns + 1000000000 < ns)
-                {
-                    last_reset_ns = ns;
-                    for (Item & item : items)
-                        item.vruntime = 0;
-                    max_vruntime = 0;
-                }
-                system_vruntime = max_vruntime;
-                busy_periods++;
-            }
-
-            if (request)
-            {
-                dequeued_requests++;
-                dequeued_cost += request->cost;
-                return {request, heap_size > 0};
-            }
+            std::push_heap(items.begin(), items.begin() + heap_size);
         }
+        else // Deactivate child if it is empty, but remember it's vruntime for latter activations
+        {
+            heap_size--;
+
+            // Store index of this inactive child in `parent.idx`
+            // This enables O(1) search of inactive children instead of O(n)
+            current.child->info.parent.idx = heap_size;
+        }
+
+        // Reset any difference between children on busy period end
+        if (heap_size == 0)
+        {
+            // Reset vtime to zero to avoid floating-point error accumulation,
+            // but do not reset too often, because it's O(N)
+            UInt64 ns = clock_gettime_ns();
+            if (last_reset_ns + 1000000000 < ns)
+            {
+                last_reset_ns = ns;
+                for (Item & item : items)
+                    item.vruntime = 0;
+                max_vruntime = 0;
+            }
+            system_vruntime = max_vruntime;
+            busy_periods++;
+        }
+
+        dequeued_requests++;
+        dequeued_cost += request->cost;
+        return {request, heap_size > 0};
     }
 
     bool isActive() override
diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h
index 45ed32343ff..38ae902bc2f 100644
--- a/src/Common/Scheduler/Nodes/FifoQueue.h
+++ b/src/Common/Scheduler/Nodes/FifoQueue.h
@@ -39,7 +39,8 @@ public:
 
     void enqueueRequest(ResourceRequest * request) override
     {
-        std::lock_guard lock(mutex);
+        std::unique_lock lock(mutex);
+        request->enqueue_ns = clock_gettime_ns();
         queue_cost += request->cost;
         bool was_empty = requests.empty();
         requests.push_back(request);
@@ -49,7 +50,7 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        std::lock_guard lock(mutex);
+        std::unique_lock lock(mutex);
         if (requests.empty())
             return {nullptr, false};
         ResourceRequest * result = requests.front();
@@ -62,29 +63,9 @@ public:
         return {result, !requests.empty()};
     }
 
-    bool cancelRequest(ResourceRequest * request) override
-    {
-        std::lock_guard lock(mutex);
-        // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
-        for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
-        {
-            if (*i == request)
-            {
-                requests.erase(i);
-                if (requests.empty())
-                    busy_periods++;
-                queue_cost -= request->cost;
-                canceled_requests++;
-                canceled_cost += request->cost;
-                return true;
-            }
-        }
-        return false;
-    }
-
     bool isActive() override
     {
-        std::lock_guard lock(mutex);
+        std::unique_lock lock(mutex);
         return !requests.empty();
     }
 
@@ -117,14 +98,14 @@ public:
 
     std::pair<UInt64, Int64> getQueueLengthAndCost()
     {
-        std::lock_guard lock(mutex);
+        std::unique_lock lock(mutex);
         return {requests.size(), queue_cost};
     }
 
 private:
     std::mutex mutex;
     Int64 queue_cost = 0;
-    std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
+    std::deque<ResourceRequest *> requests;
 };
 
 }
diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h
index 9b4cfc37f8c..6d6b15bd063 100644
--- a/src/Common/Scheduler/Nodes/PriorityPolicy.h
+++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h
@@ -102,31 +102,25 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr`
-        while (true)
+        if (items.empty())
+            return {nullptr, false};
+
+        // Recursively pull request from child
+        auto [request, child_active] = items.front().child->dequeueRequest();
+        assert(request != nullptr);
+
+        // Deactivate child if it is empty
+        if (!child_active)
         {
+            std::pop_heap(items.begin(), items.end());
+            items.pop_back();
             if (items.empty())
-                return {nullptr, false};
-
-            // Recursively pull request from child
-            auto [request, child_active] = items.front().child->dequeueRequest();
-
-            // Deactivate child if it is empty
-            if (!child_active)
-            {
-                std::pop_heap(items.begin(), items.end());
-                items.pop_back();
-                if (items.empty())
-                    busy_periods++;
-            }
-
-            if (request)
-            {
-                dequeued_requests++;
-                dequeued_cost += request->cost;
-                return {request, !items.empty()};
-            }
+                busy_periods++;
         }
+
+        dequeued_requests++;
+        dequeued_cost += request->cost;
+        return {request, !items.empty()};
     }
 
     bool isActive() override
diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp
index cdf09776077..961a3b6f713 100644
--- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp
@@ -38,6 +38,7 @@ TEST(SchedulerDynamicResourceManager, Smoke)
     {
         ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
         gA.lock();
+        gA.setFailure();
         gA.unlock();
 
         ResourceGuard gB(cB->get("res1"));
diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
index e76639a4b01..9fefbc02cbd 100644
--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
@@ -4,7 +4,6 @@
 
 #include <Common/Scheduler/Nodes/tests/ResourceTest.h>
 
-#include <barrier>
 #include <future>
 
 using namespace DB;
@@ -74,22 +73,6 @@ struct ResourceHolder
     }
 };
 
-struct MyRequest : public ResourceRequest
-{
-    std::function<void()> on_execute;
-
-    explicit MyRequest(ResourceCost cost_, std::function<void()> on_execute_)
-        : ResourceRequest(cost_)
-        , on_execute(on_execute_)
-    {}
-
-    void execute() override
-    {
-        if (on_execute)
-            on_execute();
-    }
-};
-
 TEST(SchedulerRoot, Smoke)
 {
     ResourceTest t;
@@ -128,49 +111,3 @@ TEST(SchedulerRoot, Smoke)
         EXPECT_TRUE(fc2->requests.contains(&rg.request));
     }
 }
-
-TEST(SchedulerRoot, Cancel)
-{
-    ResourceTest t;
-
-    ResourceHolder r1(t);
-    auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
-    r1.add<PriorityPolicy>("/prio");
-    auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
-    auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
-    r1.registerResource();
-
-    std::barrier sync(2);
-    std::thread consumer1([&]
-    {
-        std::barrier destruct_sync(2);
-        MyRequest request(1,[&]
-        {
-            sync.arrive_and_wait(); // (A)
-            EXPECT_TRUE(fc1->requests.contains(&request));
-            sync.arrive_and_wait(); // (B)
-            request.finish();
-            destruct_sync.arrive_and_wait(); // (C)
-        });
-        a.queue->enqueueRequest(&request);
-        destruct_sync.arrive_and_wait(); // (C)
-    });
-
-    std::thread consumer2([&]
-    {
-        MyRequest request(1,[&]
-        {
-            FAIL() << "This request must be canceled, but instead executes";
-        });
-        sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately
-        b.queue->enqueueRequest(&request);
-        bool canceled = b.queue->cancelRequest(&request);
-        EXPECT_TRUE(canceled);
-        sync.arrive_and_wait(); // (B) release request of consumer1 to be finished
-    });
-
-    consumer1.join();
-    consumer2.join();
-
-    EXPECT_TRUE(fc1->requests.empty());
-}
diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h
index 50f665a384b..dca4041b176 100644
--- a/src/Common/Scheduler/ResourceGuard.h
+++ b/src/Common/Scheduler/ResourceGuard.h
@@ -71,7 +71,8 @@ public:
             // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
             chassert(state == Dequeued);
             state = Finished;
-            ResourceRequest::finish();
+            if (constraint)
+                constraint->finishRequest(this);
         }
 
         static Request & local()
@@ -125,6 +126,12 @@ public:
         }
     }
 
+    /// Mark request as unsuccessful; by default request is considered to be successful
+    void setFailure()
+    {
+        request.successful = false;
+    }
+
     ResourceLink link;
     Request & request;
 };
diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp
deleted file mode 100644
index 26e8084cdfa..00000000000
--- a/src/Common/Scheduler/ResourceRequest.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <Common/Scheduler/ResourceRequest.h>
-#include <Common/Scheduler/ISchedulerConstraint.h>
-
-namespace DB
-{
-
-void ResourceRequest::finish()
-{
-    if (constraint)
-        constraint->finishRequest(this);
-}
-
-}
diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h
index f3153ad382c..3d2230746f9 100644
--- a/src/Common/Scheduler/ResourceRequest.h
+++ b/src/Common/Scheduler/ResourceRequest.h
@@ -14,6 +14,9 @@ class ISchedulerConstraint;
 using ResourceCost = Int64;
 constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
 
+/// Timestamps (nanoseconds since epoch)
+using ResourceNs = UInt64;
+
 /*
  * Request for a resource consumption. The main moving part of the scheduling subsystem.
  * Resource requests processing workflow:
@@ -28,7 +31,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
  *  3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request.
  *  4) Callback ResourceRequest::execute() is called to provide access to the resource.
  *  5) The resource consumption is happening outside of the scheduling subsystem.
- *  6) ResourceRequest::finish() is called when consumption is finished.
+ *  6) request->constraint->finishRequest() is called when consumption is finished.
  *
  * Steps (5) and (6) can be omitted if constraint is not used by the resource.
  *
@@ -36,10 +39,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
  * Request ownership is done outside of the scheduling subsystem.
  * After (6) request can be destructed safely.
  *
- * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest().
- * Returning false means it is too late for request to be canceled. It should be processed in a regular way.
- * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
- * and step (6) MUST be omitted.
+ * Request cancelling is not supported yet.
  */
 class ResourceRequest
 {
@@ -48,20 +48,32 @@ public:
     /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
     ResourceCost cost;
 
+    /// Request outcome
+    /// Should be filled during resource consumption
+    bool successful;
+
     /// Scheduler node to be notified on consumption finish
     /// Auto-filled during request enqueue/dequeue
     ISchedulerConstraint * constraint;
 
+    /// Timestamps for introspection
+    ResourceNs enqueue_ns;
+    ResourceNs execute_ns;
+    ResourceNs finish_ns;
+
     explicit ResourceRequest(ResourceCost cost_ = 1)
     {
         reset(cost_);
     }
 
-    /// ResourceRequest object may be reused again after reset()
     void reset(ResourceCost cost_)
     {
         cost = cost_;
+        successful = true;
         constraint = nullptr;
+        enqueue_ns = 0;
+        execute_ns = 0;
+        finish_ns = 0;
     }
 
     virtual ~ResourceRequest() = default;
@@ -71,12 +83,6 @@ public:
     /// just triggering start of a consumption, not doing the consumption itself
     /// (e.g. setting an std::promise or creating a job in a thread pool)
     virtual void execute() = 0;
-
-    /// Stop resource consumption and notify resource scheduler.
-    /// Should be called when resource consumption is finished by consumer.
-    /// ResourceRequest should not be destructed or reset before calling to `finish()`.
-    /// WARNING: this function MUST not be called if request was canceled.
-    void finish();
 };
 
 }
diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h
index ab3f702a422..3a23a8df834 100644
--- a/src/Common/Scheduler/SchedulerRoot.h
+++ b/src/Common/Scheduler/SchedulerRoot.h
@@ -145,27 +145,22 @@ public:
 
     std::pair<ResourceRequest *, bool> dequeueRequest() override
     {
-        while (true)
-        {
-            if (current == nullptr) // No active resources
-                return {nullptr, false};
+        if (current == nullptr) // No active resources
+            return {nullptr, false};
 
-            // Dequeue request from current resource
-            auto [request, resource_active] = current->root->dequeueRequest();
+        // Dequeue request from current resource
+        auto [request, resource_active] = current->root->dequeueRequest();
+        assert(request != nullptr);
 
-            // Deactivate resource if required
-            if (!resource_active)
-                deactivate(current);
-            else
-                current = current->next; // Just move round-robin pointer
+        // Deactivate resource if required
+        if (!resource_active)
+            deactivate(current);
+        else
+            current = current->next; // Just move round-robin pointer
 
-            if (request == nullptr) // Possible in case of request cancel, just retry
-                continue;
-
-            dequeued_requests++;
-            dequeued_cost += request->cost;
-            return {request, current != nullptr};
-        }
+        dequeued_requests++;
+        dequeued_cost += request->cost;
+        return {request, current != nullptr};
     }
 
     bool isActive() override
@@ -250,6 +245,7 @@ private:
 
     void execute(ResourceRequest * request)
     {
+        request->execute_ns = clock_gettime_ns();
         request->execute();
     }
 
diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp
index 633bac5d285..ba07d44dbf9 100644
--- a/src/Storages/System/StorageSystemScheduler.cpp
+++ b/src/Storages/System/StorageSystemScheduler.cpp
@@ -30,9 +30,7 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription()
         {"is_active", std::make_shared<DataTypeUInt8>(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."},
         {"active_children", std::make_shared<DataTypeUInt64>(), "The number of children in active state."},
         {"dequeued_requests", std::make_shared<DataTypeUInt64>(), "The total number of resource requests dequeued from this node."},
-        {"canceled_requests", std::make_shared<DataTypeUInt64>(), "The total number of resource requests canceled from this node."},
         {"dequeued_cost", std::make_shared<DataTypeInt64>(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."},
-        {"canceled_cost", std::make_shared<DataTypeInt64>(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."},
         {"busy_periods", std::make_shared<DataTypeUInt64>(), "The total number of deactivations of this node."},
         {"vruntime", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()),
             "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."},
@@ -95,9 +93,7 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         res_columns[i++]->insert(node->isActive());
         res_columns[i++]->insert(node->activeChildren());
         res_columns[i++]->insert(node->dequeued_requests.load());
-        res_columns[i++]->insert(node->canceled_requests.load());
         res_columns[i++]->insert(node->dequeued_cost.load());
-        res_columns[i++]->insert(node->canceled_cost.load());
         res_columns[i++]->insert(node->busy_periods.load());
 
         Field vruntime;

From 64a80f10116488113f22ddbdb8fcb1151220bf55 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 21 Feb 2024 20:33:39 +0100
Subject: [PATCH 882/884] Fix default path when path is not specified in config
 (#59654)

* Update Server.cpp

* Update SentryWriter.cpp

* Update Keeper.cpp

* Update SentryWriter.cpp
---
 programs/keeper/Keeper.cpp  | 2 +-
 programs/server/Server.cpp  | 2 +-
 src/Daemon/SentryWriter.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 5b844e7d650..8972c82eab8 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -560,7 +560,7 @@ try
     auto main_config_reloader = std::make_unique<ConfigReloader>(
         config_path,
         extra_paths,
-        config().getString("path", ""),
+        config().getString("path", KEEPER_DEFAULT_PATH),
         std::move(unused_cache),
         unused_event,
         [&](ConfigurationPtr config, bool /* initial_loading */)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index a886ff9bcd0..74fcc7326fc 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1292,7 +1292,7 @@ try
     auto main_config_reloader = std::make_unique<ConfigReloader>(
         config_path,
         extra_paths,
-        config().getString("path", ""),
+        config().getString("path", DBMS_DEFAULT_PATH),
         std::move(main_config_zk_node_cache),
         main_config_zk_changed_event,
         [&](ConfigurationPtr config, bool initial_loading)
diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp
index ebfd18abeee..192e9952b9a 100644
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@@ -78,7 +78,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
 
     if (enabled)
     {
-        server_data_path = config.getString("path", "");
+        server_data_path = config.getString("path", DB::DBMS_DEFAULT_PATH);
         const std::filesystem::path & default_tmp_path = fs::path(config.getString("tmp_path", fs::temp_directory_path())) / "sentry";
         const std::string & endpoint
             = config.getString("send_crash_reports.endpoint");

From 82ba2ebdb246702615a9b564a6d626bb0695ee41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 21 Feb 2024 20:34:56 +0100
Subject: [PATCH 883/884] Revert "Generate a conflict"

This reverts commit 5fc28c536cefac8aa0e58f3c2f5b78f2650a2d8f.
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index dcfe145b04c..433195af9c3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -640,7 +640,7 @@ class IColumn;
     M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
     M(Bool, describe_include_virtual_columns, false, "If true, virtual columns of table will be included into result of DESCRIBE query", 0) \
     M(Bool, describe_compact_output, false, "If true, include only column names and types into result of DESCRIBE query", 0) \
-    M(Bool, apply_mutations_on_fly, false, "Generate a conflict", 0) \
+    M(Bool, apply_mutations_on_fly, false, "Only available in ClickHouse Cloud", 0) \
     M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \

From a4f765cae7bc76d2af9f4bc4ca584b34f0575680 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 22 Feb 2024 09:51:10 +0100
Subject: [PATCH 884/884] Improve performance of SELECTs with active mutations
 (#59531)

* Configure keeper for perf tests

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>

* Improve performance of SELECTs with active mutations

getAlterMutationCommandsForPart() can be a hot path for query execution
when there are pending mutations.

- LOG_TEST - it is not only check one bool, but actually a bunch of
  atomics as well.

- Return std::vector over std::map (map is not required there) - no
  changes in performance.

- Copy only RENAME_COLUMN (since only this mutation is required by
  AlterConversions).

And here are results:

run|result
-|-
SELECT w/o ALTER|queries: 1565, QPS: 355.259, RPS: 355.259
SELECT w/ ALTER unpatched|queries: 2099, QPS: 220.623, RPS: 220.623
SELECT w/ ALTER and w/o LOG_TEST|queries: 2730, QPS: 235.859, RPS: 235.859
SELECT w/ ALTER and w/o LOG_TEST and w/ RENAME_COLUMN only|queries: 2995, QPS: 290.982, RPS: 290.982

But there are still room for improvements, at least MergeTree engines
could implement getStorageSnapshotForQuery().

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>

* Add AlterConversions::supportsMutationCommandType(), flatten vector<vector<MutationCommand>>

* Work around what appears to be a clang static analysis bug

---------

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
Co-authored-by: Michael Kolupaev <michael.kolupaev@clickhouse.com>
---
 src/Storages/MergeTree/AlterConversions.cpp   |  5 +++
 src/Storages/MergeTree/AlterConversions.h     |  2 ++
 src/Storages/MergeTree/MergeTreeData.cpp      |  7 ++--
 src/Storages/MergeTree/MergeTreeData.h        | 11 +++---
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 22 +++++++-----
 .../MergeTree/ReplicatedMergeTreeQueue.h      |  2 +-
 src/Storages/StorageMergeTree.cpp             | 12 ++++---
 src/Storages/StorageMergeTree.h               |  2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |  2 +-
 src/Storages/StorageReplicatedMergeTree.h     |  2 +-
 tests/performance/alter_select.xml            | 35 +++++++++++++++++++
 tests/performance/scripts/compare.sh          | 16 ++++++++-
 .../zzz-perf-comparison-tweaks-config.xml     |  3 --
 13 files changed, 90 insertions(+), 31 deletions(-)
 create mode 100644 tests/performance/alter_select.xml

diff --git a/src/Storages/MergeTree/AlterConversions.cpp b/src/Storages/MergeTree/AlterConversions.cpp
index a98cd6d99f9..31f8f17e2c1 100644
--- a/src/Storages/MergeTree/AlterConversions.cpp
+++ b/src/Storages/MergeTree/AlterConversions.cpp
@@ -9,6 +9,11 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+bool AlterConversions::supportsMutationCommandType(MutationCommand::Type t)
+{
+    return t == MutationCommand::Type::RENAME_COLUMN;
+}
+
 void AlterConversions::addMutationCommand(const MutationCommand & command)
 {
     /// Currently only RENAME_COLUMN is applied on-fly.
diff --git a/src/Storages/MergeTree/AlterConversions.h b/src/Storages/MergeTree/AlterConversions.h
index 4410b9c56e2..0f857d351dd 100644
--- a/src/Storages/MergeTree/AlterConversions.h
+++ b/src/Storages/MergeTree/AlterConversions.h
@@ -35,6 +35,8 @@ public:
     /// Get column old name before rename (lookup by key in rename_map)
     std::string getColumnOldName(const std::string & new_name) const;
 
+    static bool supportsMutationCommandType(MutationCommand::Type);
+
 private:
     /// Rename map new_name -> old_name.
     std::vector<RenamePair> rename_map;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e14a358745e..2e63701dbdb 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -7950,12 +7950,11 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S
 
 AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartPtr part) const
 {
-    auto commands_map = getAlterMutationCommandsForPart(part);
+    auto commands = getAlterMutationCommandsForPart(part);
 
     auto result = std::make_shared<AlterConversions>();
-    for (const auto & [_, commands] : commands_map)
-        for (const auto & command : commands)
-            result->addMutationCommand(command);
+    for (const auto & command : commands | std::views::reverse)
+        result->addMutationCommand(command);
 
     return result;
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 1de79ed17ca..b06b3018938 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1356,11 +1356,12 @@ protected:
     /// mechanisms for parts locking
     virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0;
 
-    /// Return most recent mutations commands for part which weren't applied
-    /// Used to receive AlterConversions for part and apply them on fly. This
-    /// method has different implementations for replicated and non replicated
-    /// MergeTree because they store mutations in different way.
-    virtual std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
+    /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly
+    /// (i.e. when reading from the part). Mutations not supported by AlterConversions
+    /// (supportsMutationCommandType()) can be omitted.
+    ///
+    /// @return list of mutations, in *reverse* order (newest to oldest)
+    virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
 
     struct PartBackupEntries
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index e26a36202dd..6b730fbd6eb 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1789,7 +1789,7 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk
 }
 
 
-std::map<int64_t, MutationCommands> ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const
+MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const
 {
     std::unique_lock lock(state_mutex);
 
@@ -1799,9 +1799,8 @@ std::map<int64_t, MutationCommands> ReplicatedMergeTreeQueue::getAlterMutationCo
 
     Int64 part_data_version = part->info.getDataVersion();
     Int64 part_metadata_version = part->getMetadataVersion();
-    LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version);
 
-    std::map<int64_t, MutationCommands> result;
+    MutationCommands result;
 
     bool seen_all_data_mutations = false;
     bool seen_all_metadata_mutations = false;
@@ -1814,7 +1813,15 @@ std::map<int64_t, MutationCommands> ReplicatedMergeTreeQueue::getAlterMutationCo
         if (seen_all_data_mutations && seen_all_metadata_mutations)
             break;
 
-        auto alter_version = mutation_status->entry->alter_version;
+        auto & entry = mutation_status->entry;
+
+        auto add_to_result = [&] {
+            for (const auto & command : entry->commands | std::views::reverse)
+                if (AlterConversions::supportsMutationCommandType(command.type))
+                    result.emplace_back(command);
+        };
+
+        auto alter_version = entry->alter_version;
         if (alter_version != -1)
         {
             if (alter_version > storage.getInMemoryMetadataPtr()->getMetadataVersion())
@@ -1822,22 +1829,19 @@ std::map<int64_t, MutationCommands> ReplicatedMergeTreeQueue::getAlterMutationCo
 
             /// We take commands with bigger metadata version
             if (alter_version > part_metadata_version)
-                result[mutation_version] = mutation_status->entry->commands;
+                add_to_result();
             else
                 seen_all_metadata_mutations = true;
         }
         else
         {
             if (mutation_version > part_data_version)
-                result[mutation_version] = mutation_status->entry->commands;
+                add_to_result();
             else
                 seen_all_data_mutations = true;
         }
     }
 
-    LOG_TEST(log, "Got {} commands for part {} (part data version {}, part metadata version {})",
-        result.size(), part->name, part_data_version, part_metadata_version);
-
     return result;
 }
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 84106565dff..743ca7fc258 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -401,7 +401,7 @@ public:
     /// Return mutation commands for part which could be not applied to
     /// it according to part mutation version. Used when we apply alter commands on fly,
     /// without actual data modification on disk.
-    std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const;
+    MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const;
 
     /// Mark finished mutations as done. If the function needs to be called again at some later time
     /// (because some mutations are probably done but we are not sure yet), returns true.
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 678535da732..3458bd18ed3 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2394,19 +2394,21 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts)
 }
 
 
-std::map<int64_t, MutationCommands> StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const
+MutationCommands StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const
 {
     std::lock_guard lock(currently_processing_in_background_mutex);
 
     UInt64 part_data_version = part->info.getDataVersion();
-    std::map<int64_t, MutationCommands> result;
+    MutationCommands result;
 
     for (const auto & [mutation_version, entry] : current_mutations_by_version | std::views::reverse)
     {
-        if (mutation_version > part_data_version)
-            result[mutation_version] = entry.commands;
-        else
+        if (mutation_version <= part_data_version)
             break;
+
+        for (const auto & command : entry.commands | std::views::reverse)
+            if (AlterConversions::supportsMutationCommandType(command.type))
+                result.emplace_back(command);
     }
 
     return result;
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 359fa1d262d..8c41664b23c 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -308,7 +308,7 @@ private:
     };
 
 protected:
-    std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const override;
+    MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override;
 };
 
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8e1598a1eef..0618737a56d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8957,7 +8957,7 @@ bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const
 }
 
 
-std::map<int64_t, MutationCommands> StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const
+MutationCommands StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const
 {
     return queue.getAlterMutationCommandsForPart(part);
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 79d6d1dce3d..1c2cdb3ec07 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -938,7 +938,7 @@ private:
     void waitMutationToFinishOnReplicas(
         const Strings & replicas, const String & mutation_id) const;
 
-    std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const override;
+    MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override;
 
     void startBackgroundMovesIfNeeded() override;
 
diff --git a/tests/performance/alter_select.xml b/tests/performance/alter_select.xml
new file mode 100644
index 00000000000..fbbf603dcba
--- /dev/null
+++ b/tests/performance/alter_select.xml
@@ -0,0 +1,35 @@
+<test>
+  <substitutions>
+    <substitution>
+      <name>engine</name>
+      <values>
+        <value>mt</value>
+        <value>rmt</value>
+      </values>
+    </substitution>
+  </substitutions>
+
+  <create_query>create table alter_select_mt  (part_id String, col_0 String) engine=MergeTree()                                              partition by part_id order by tuple() settings max_parts_to_merge_at_once=1</create_query>
+  <create_query>create table alter_select_rmt (part_id String, col_0 String) engine=ReplicatedMergeTree('/tables/{{database}}', '{{table}}') partition by part_id order by tuple() settings max_parts_to_merge_at_once=1</create_query>
+
+  <create_query>system stop merges alter_select_{engine}</create_query>
+
+  <fill_query>
+      insert into alter_select_{engine} (part_id, col_0)
+      select toString(number % 5000), 0 from numbers(10000)
+      settings
+      max_block_size=1,
+      max_insert_threads=32,
+      min_insert_block_size_rows=1,
+      insert_deduplicate=false,
+      parts_to_delay_insert=100000,
+      parts_to_throw_insert=100000
+  </fill_query>
+  <fill_query>alter table alter_select_{engine} drop column col_0 settings alter_sync = 0</fill_query>
+
+  <query>select count() from alter_select_{engine} format Null settings max_threads=1</query>
+  <query>select * from alter_select_{engine} format Null settings max_threads=1</query>
+
+  <drop_query>drop table alter_select_{engine}</drop_query>
+</test>
+
diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh
index 39c6854fbf9..9a0fb5b335c 100755
--- a/tests/performance/scripts/compare.sh
+++ b/tests/performance/scripts/compare.sh
@@ -11,8 +11,14 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
 # upstream/master
 LEFT_SERVER_PORT=9001
+LEFT_SERVER_KEEPER_PORT=9181
+LEFT_SERVER_KEEPER_RAFT_PORT=9234
+LEFT_SERVER_INTERSERVER_PORT=9009
 # patched version
-RIGHT_SERVER_PORT=9002
+RIGHT_SERVER_PORT=19001
+RIGHT_SERVER_KEEPER_PORT=19181
+RIGHT_SERVER_KEEPER_RAFT_PORT=19234
+RIGHT_SERVER_INTERSERVER_PORT=19009
 
 # abort_conf   -- abort if some options is not recognized
 # abort        -- abort if something is not right in the env (i.e. per-cpu arenas does not work)
@@ -127,6 +133,10 @@ function restart
         --user_files_path left/db/user_files
         --top_level_domains_path "$(left_or_right left top_level_domains)"
         --tcp_port $LEFT_SERVER_PORT
+        --keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT
+        --keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT
+        --zookeeper.node.port $LEFT_SERVER_KEEPER_PORT
+        --interserver_http_port $LEFT_SERVER_INTERSERVER_PORT
     )
     left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log &
     left_pid=$!
@@ -142,6 +152,10 @@ function restart
         --user_files_path right/db/user_files
         --top_level_domains_path "$(left_or_right right top_level_domains)"
         --tcp_port $RIGHT_SERVER_PORT
+        --keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT
+        --keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT
+        --zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT
+        --interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT
     )
     right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log &
     right_pid=$!
diff --git a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml
index 292665c4f68..c2bef2b479a 100644
--- a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml
@@ -2,10 +2,7 @@
     <http_port remove="remove"/>
     <mysql_port remove="remove"/>
     <postgresql_port remove="remove"/>
-    <interserver_http_port remove="remove"/>
     <tcp_with_proxy_port remove="remove"/>
-    <keeper_server remove="remove"/>
-    <zookeeper remove="remove"/>
     <listen_host>::</listen_host>
 
     <logger>